diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 88df687bd6..8eede28705 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -38,7 +38,7 @@ jobs: CPU_MAC: runs-on: macos-latest env: - FC: gfortran-11 + FC: gfortran-14 # see #971 strategy: matrix: folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum, epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg ] diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 index b90ef84b47..b64e42a22e 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 @@ -1,8 +1,8 @@ diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f -index 4fbb8e6ba..f9e2335de 100644 +index 4fbb8e6ba..d5accb9fb 100644 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f -@@ -484,23 +484,140 @@ C +@@ -484,23 +484,142 @@ C INTEGER VECSIZE_USED INTEGER IVEC @@ -40,7 +40,7 @@ index 4fbb8e6ba..f9e2335de 100644 + + IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) +#endif -+ call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 ++ call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO - DO IVEC=1, VECSIZE_USED @@ -67,7 +67,7 @@ index 4fbb8e6ba..f9e2335de 100644 + ENDDO !$OMP END DO !$OMP END PARALLEL -+ call counters_smatrix1multi_stop( -1 ) ! fortran=-1 ++ call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 +#ifdef MG5AMC_MEEXPORTER_CUDACPP + ENDIF + @@ -77,9 +77,10 @@ index 4fbb8e6ba..f9e2335de 100644 + STOP + ENDIF + IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) ++ call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, -+ & SELECTED_HEL2, SELECTED_COL2 ) ++ & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities + FIRST = .FALSE. +c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) + IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -93,22 +94,23 @@ index 4fbb8e6ba..f9e2335de 100644 + ENDIF + WRITE (6,*) 'NGOODHEL =', NGOODHEL + WRITE (6,*) 'NCOMB =', NCOMB ++ call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 + ENDIF -+ call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 ++ call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 + IF ( .NOT. MULTI_CHANNEL ) THEN + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, -+ & SELECTED_HEL2, SELECTED_COL2 ) ++ & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities + ELSE + IF( SDE_STRAT.NE.1 ) THEN + WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy + STOP + ENDIF -+ CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ++ CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled + & HEL_RAND, COL_RAND, CHANNEL, OUT2, -+ & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled ++ & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities + ENDIF -+ call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 ++ call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 + ENDIF + + IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) @@ -284,7 +286,7 @@ index 1124a9164..27a6e4674 100644 open(unit=lun,file=tempname,status='old',ERR=20) fopened=.true. diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f -index e73e654d4..27fbe7302 100644 +index e73e654d4..3072054f2 100644 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -72,7 +72,10 @@ C @@ -299,15 +301,7 @@ index e73e654d4..27fbe7302 100644 C C This is just to temporarily store the reference grid for C helicity of the DiscreteSampler so as to obtain its number of -@@ -140,6 +143,7 @@ C ---------- - C BEGIN CODE - C ---------- - -+ call counters_smatrix1_start() - NTRY(IMIRROR)=NTRY(IMIRROR)+1 - THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 - DO I=1,NEXTERNAL -@@ -217,6 +221,17 @@ C ---------- +@@ -217,6 +220,17 @@ C ---------- ENDIF IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR)) @@ -325,22 +319,3 @@ index e73e654d4..27fbe7302 100644 ENDIF ENDIF ELSE IF (.NOT.INIT_MODE) THEN ! random helicity -@@ -234,6 +249,7 @@ C Include the Jacobian from helicity sampling - IHEL = HEL_PICKED - ELSE - ANS = 1D0 -+ call counters_smatrix1_stop() - RETURN - ENDIF - IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN -@@ -278,9 +294,8 @@ C Set right sign for ANS, based on sign of chosen helicity - ENDIF - ENDIF - ANS=ANS/DBLE(IDEN) -- - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) -- -+ call counters_smatrix1_stop() - END - - diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/counters.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/counters.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index a052631aa9..78512a5eeb 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %%/bin/nvcc,%%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %%/bin/hipcc,%%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f index 5bbeefbb58..fb942500a5 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh index 012cbdf6a0..ca7decaa37 100755 --- a/epochX/cudacpp/CODEGEN/generateAndCompare.sh +++ b/epochX/cudacpp/CODEGEN/generateAndCompare.sh @@ -331,6 +331,7 @@ function codeGenAndDiff() | awk -vdate="D:20240301000000+01'00'" '{print gensub("(^/ModDate\\().*(\\)>>endobj$)","\\1"date"\\2","g")}' \ | awk -vdate="D:20240301000000+01'00'" '{print gensub("(^/CreationDate\\().*(\\)$)","\\1"date"\\2","g")}' \ | awk -vid="0123456789abcdef0123456789abcdef" '{print gensub("(^/ID \\[<).*><.*(>\\]$)","\\1"id"><"id"\\2","g")}' \ + | awk -vid="0123456789abcdef0123456789abcdef" '{print gensub("(^/ID \\[\\().*\\)\\(.*(\\)\\]$)","\\1"id")("id"\\2","g")}' \ | awk -vdate="2024-03-01T00:00:00+01:00" '{print gensub("().*()","\\1"date"\\2","g")}' \ | awk -vdate="2024-03-01T00:00:00+01:00" '{print gensub("().*()","\\1"date"\\2","g")}' \ | awk -vuuid="'uuid=01234567-89ab-cdef-0123-456789abcdef'" '{print gensub("(xapMM:DocumentID=).*(/>$)","\\1"uuid"\\2","g")}' \ diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 5982c61ae8..f059e68f5e 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005379676818847656  +DEBUG: model prefixing takes 0.005307912826538086  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,7 +177,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -198,18 +198,18 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1544]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1545]  Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.114 s +Wrote files for 8 helas calls in 0.112 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.201 s +ALOHA: aloha creates 3 routines in 0.198 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.255 s +ALOHA: aloha creates 7 routines in 0.253 s FFV1 FFV1 FFV2 @@ -252,9 +252,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.118s -user 0m1.862s -sys 0m0.242s +real 0m2.067s +user 0m1.807s +sys 0m0.251s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/Bridge.h b/epochX/cudacpp/ee_mumu.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f index 38978865ff..ef45890e25 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f @@ -528,7 +528,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -544,7 +544,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -554,9 +554,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -570,22 +571,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f index f1b5fc0e1a..c2a8b78ed6 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f @@ -143,7 +143,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -267,7 +266,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -312,8 +310,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/counters.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index ae0e225418..a96bc91d5b 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00564265251159668  +DEBUG: model prefixing takes 0.005346059799194336  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,13 +177,13 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.271 s +ALOHA: aloha creates 4 routines in 0.264 s FFV1 FFV1 FFV2 @@ -202,7 +202,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.674s -user 0m0.596s -sys 0m0.058s +real 0m0.647s +user 0m0.592s +sys 0m0.048s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/Bridge.h b/epochX/cudacpp/ee_mumu.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index d3614c325f..b7616fe096 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005547761917114258  +DEBUG: model prefixing takes 0.005777120590209961  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -198,15 +198,15 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1544]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1545]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.117 s +Wrote files for 10 helas calls in 0.115 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.146 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.139 s +ALOHA: aloha creates 4 routines in 0.132 s VVV1 FFV1 FFV1 @@ -241,9 +241,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.050s -user 0m1.662s -sys 0m0.268s +real 0m1.927s +user 0m1.671s +sys 0m0.252s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index f9e2335de4..d5accb9fb2 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f index 27fbe7302c..3072054f2d 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -143,7 +143,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -249,7 +248,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -294,8 +292,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 5f921c39c6..b84f753a35 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005394935607910156  +DEBUG: model prefixing takes 0.005595207214355469  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.009 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -182,7 +182,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.144 s VVV1 FFV1 FFV1 @@ -197,7 +197,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.549s -user 0m0.474s -sys 0m0.058s -Code generation completed in 0 seconds +real 0m0.556s +user 0m0.475s +sys 0m0.048s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/Bridge.h b/epochX/cudacpp/gg_tt.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 2ea2a5346a..7fabd11d28 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0055332183837890625  +DEBUG: model prefixing takes 0.005646228790283203  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.009 s Total: 1 processes with 3 diagrams add process g g > t t~ g INFO: Checking for minimal orders which gives processes. @@ -163,7 +163,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.020 s +1 processes with 16 diagrams generated in 0.019 s Total: 2 processes with 19 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -188,7 +188,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -209,7 +209,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxg DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1545]  INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -228,22 +228,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1520]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1544]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1545]  -Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s -Wrote files for 46 helas calls in 0.281 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.043 s +Wrote files for 46 helas calls in 0.275 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.336 s +ALOHA: aloha creates 5 routines in 0.331 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.317 s +ALOHA: aloha creates 10 routines in 0.315 s VVV1 VVV1 FFV1 @@ -291,10 +291,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.722s -user 0m2.407s -sys 0m0.292s -Code generation completed in 4 seconds +real 0m2.676s +user 0m2.362s +sys 0m0.310s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index f9e2335de4..d5accb9fb2 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f index 27fbe7302c..3072054f2d 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -143,7 +143,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -249,7 +248,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -294,8 +292,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f index 29cee23b2e..3b6a3f178d 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f index b13c503fae..1dd3491413 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f @@ -159,7 +159,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -265,7 +264,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -310,8 +308,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index dc2276a50d..18b1d80415 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005425453186035156  +DEBUG: model prefixing takes 0.005260467529296875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -198,21 +198,21 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxg DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1544]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1545]  Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s -Wrote files for 36 helas calls in 0.165 s +Wrote files for 36 helas calls in 0.162 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.330 s +ALOHA: aloha creates 5 routines in 0.322 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.317 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -256,9 +256,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.506s -user 0m2.207s -sys 0m0.271s +real 0m2.483s +user 0m2.197s +sys 0m0.283s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttg.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index c9ca1538d3..1c3ba92e6d 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f index 3d035277eb..6fdf8a8d07 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -159,7 +159,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -265,7 +264,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -310,8 +308,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 433938fa3c..a103152d0f 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005648612976074219  +DEBUG: model prefixing takes 0.00570988655090332  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -178,14 +178,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.327 s +ALOHA: aloha creates 5 routines in 0.323 s VVV1 VVV1 FFV1 @@ -205,7 +205,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.818s -user 0m0.725s -sys 0m0.053s +real 0m0.774s +user 0m0.711s +sys 0m0.055s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttg.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 8412f20e64..816c1d75f7 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0055027008056640625  +DEBUG: model prefixing takes 0.0055654048919677734  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.160 s +1 processes with 123 diagrams generated in 0.156 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -197,22 +197,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxgg DEBUG: len(subproc_diagrams_for_config) =  105 [model_handling.py at line 1520]  DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [model_handling.py at line 1544]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1545]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.430 s -Wrote files for 222 helas calls in 0.712 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.428 s +Wrote files for 222 helas calls in 0.706 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.334 s +ALOHA: aloha creates 5 routines in 0.333 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.319 s +ALOHA: aloha creates 10 routines in 0.317 s VVV1 VVV1 FFV1 @@ -259,9 +259,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.929s -user 0m3.539s -sys 0m0.294s +real 0m3.822s +user 0m3.543s +sys 0m0.260s Code generation completed in 4 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f index 208149fcf6..ddc480ec63 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f index 0413417a30..fdcc390db4 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f @@ -191,7 +191,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -297,7 +296,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -342,8 +340,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index ec446c348d..5c8b6b0535 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005393266677856445  +DEBUG: model prefixing takes 0.0053234100341796875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.158 s +1 processes with 123 diagrams generated in 0.157 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -178,14 +178,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.429 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.430 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.321 s +ALOHA: aloha creates 5 routines in 0.322 s VVV1 VVV1 FFV1 @@ -208,7 +208,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.690s -user 0m1.392s -sys 0m0.051s -Code generation completed in 2 seconds +real 0m1.496s +user 0m1.376s +sys 0m0.058s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 80b849a95d..cf81051351 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005666971206665039  +DEBUG: model prefixing takes 0.005418062210083008  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.906 s +1 processes with 1240 diagrams generated in 1.889 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -180,7 +180,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -199,22 +199,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxggg DEBUG: len(subproc_diagrams_for_config) =  945 [model_handling.py at line 1520]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [model_handling.py at line 1544]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [model_handling.py at line 1545]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.619 s -Wrote files for 2281 helas calls in 18.549 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.527 s +Wrote files for 2281 helas calls in 18.453 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.325 s +ALOHA: aloha creates 5 routines in 0.318 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.369 s +ALOHA: aloha creates 10 routines in 0.355 s VVV1 VVV1 FFV1 @@ -261,9 +261,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m32.883s -user 0m32.292s -sys 0m0.459s +real 0m32.580s +user 0m32.015s +sys 0m0.455s Code generation completed in 33 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f index 7c94a0776f..5f55c4daed 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f index 7722c3af16..870c890410 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f index b9e6d3613f..aefbff4b80 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f @@ -255,7 +255,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -361,7 +360,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -406,8 +404,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 9fa53f086d..70ece972f5 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005432844161987305  +DEBUG: model prefixing takes 0.005778312683105469  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.902 s +1 processes with 1240 diagrams generated in 1.872 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -178,14 +178,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.640 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.585 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.354 s +ALOHA: aloha creates 5 routines in 0.348 s VVV1 VVV1 FFV1 @@ -208,7 +208,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m13.132s -user 0m12.955s -sys 0m0.111s -Code generation completed in 13 seconds +real 0m13.103s +user 0m12.928s +sys 0m0.109s +Code generation completed in 14 seconds diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f index 7722c3af16..870c890410 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index f5c94e00cd..cb97eb9e35 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005633831024169922  +DEBUG: model prefixing takes 0.005686521530151367  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.080 s +8 processes with 40 diagrams generated in 0.076 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -201,7 +201,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -222,7 +222,7 @@ INFO: Finding symmetric diagrams for subprocess group gu_ttxu DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1545]  INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -241,7 +241,7 @@ INFO: Finding symmetric diagrams for subprocess group gux_ttxux DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1520]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1544]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1545]  -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s Wrote files for 32 helas calls in 0.249 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines @@ -250,7 +250,7 @@ ALOHA: aloha creates 2 routines in 0.146 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.134 s +ALOHA: aloha creates 4 routines in 0.133 s FFV1 FFV1 FFV1 @@ -302,10 +302,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.313s -user 0m1.990s -sys 0m0.293s -Code generation completed in 2 seconds +real 0m3.389s +user 0m1.964s +sys 0m0.295s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gq_ttq.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index 2c11f53b89..3d7efb5585 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -560,7 +560,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -576,7 +576,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -586,9 +586,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -602,22 +603,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f index b3c4ec75f6..c1fb026c9e 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -162,7 +162,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -281,7 +280,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -326,8 +324,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index d829a73049..d65bac7611 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -560,7 +560,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -576,7 +576,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -586,9 +586,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -602,22 +603,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f index 20ec98ad2f..bbe2b8626e 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -162,7 +162,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -281,7 +280,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -326,8 +324,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 96ced9fbc8..1548b0cef5 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0056612491607666016  +DEBUG: model prefixing takes 0.005625486373901367  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.080 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -210,11 +210,11 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.144 s FFV1 FFV1 FFV1 @@ -230,7 +230,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.656s -user 0m0.589s -sys 0m0.057s +real 0m0.659s +user 0m0.597s +sys 0m0.049s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/Bridge.h b/epochX/cudacpp/gq_ttq.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index 71b6f32fa3..d530a89960 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -150,7 +150,7 @@ INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Creating files in directory P1_gg_bbx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -170,19 +170,19 @@ INFO: Finding symmetric diagrams for subprocess group gg_bbx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1544]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1545]  Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s -Wrote files for 12 helas calls in 0.118 s +Wrote files for 12 helas calls in 0.119 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.269 s +ALOHA: aloha creates 4 routines in 0.262 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.251 s +ALOHA: aloha creates 8 routines in 0.249 s VVS3 VVV1 FFV1 @@ -219,9 +219,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.198s -user 0m1.905s -sys 0m0.268s +real 0m3.154s +user 0m1.883s +sys 0m0.276s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/Bridge.h b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f index d2b257590d..b8bcf54554 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/fcheck_sa.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/fcheck_sa.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f index 9ae8713f43..5c1baf8703 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f @@ -143,7 +143,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -249,7 +248,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -294,8 +292,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/counters.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index b38ca5ac91..14cb5a6988 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -156,7 +156,7 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.264 s +ALOHA: aloha creates 4 routines in 0.278 s VVS3 VVV1 FFV1 @@ -173,7 +173,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.666s -user 0m0.585s -sys 0m0.056s +real 0m0.756s +user 0m0.610s +sys 0m0.064s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/Bridge.h b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/fcheck_sa.f b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/fcheck_sa.f +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 439cf73e6a..c6b7a90b66 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005627632141113281  +DEBUG: model prefixing takes 0.00522923469543457  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.137 s +13 processes with 76 diagrams generated in 0.135 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.816 s +65 processes with 1119 diagrams generated in 1.855 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -500,7 +500,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -521,7 +521,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxgg DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1545]  INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -542,7 +542,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxuux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [model_handling.py at line 1545]  INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -563,7 +563,7 @@ INFO: Finding symmetric diagrams for subprocess group gu_ttxgu DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [model_handling.py at line 1545]  INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -584,7 +584,7 @@ INFO: Finding symmetric diagrams for subprocess group gux_ttxgux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [model_handling.py at line 1545]  INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -605,7 +605,7 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttxgg DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [model_handling.py at line 1545]  INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -626,7 +626,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxg DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1545]  INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -647,7 +647,7 @@ INFO: Finding symmetric diagrams for subprocess group uu_ttxuu DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1545]  INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -668,7 +668,7 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttxuux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1545]  INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -689,7 +689,7 @@ INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1545]  INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -710,7 +710,7 @@ INFO: Finding symmetric diagrams for subprocess group uc_ttxuc DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1545]  INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -731,7 +731,7 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttxccx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1545]  INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -752,7 +752,7 @@ INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1545]  INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -773,7 +773,7 @@ INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1545]  INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -794,7 +794,7 @@ INFO: Finding symmetric diagrams for subprocess group gu_ttxu DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1545]  INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -815,7 +815,7 @@ INFO: Finding symmetric diagrams for subprocess group gux_ttxux DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1545]  INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -836,7 +836,7 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttxg DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1545]  INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -857,7 +857,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1545]  INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -876,22 +876,22 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttx DEBUG: len(subproc_diagrams_for_config) =  1 [model_handling.py at line 1520]  DEBUG: iconfig_to_diag =  {1: 1} [model_handling.py at line 1544]  DEBUG: diag_to_iconfig =  {1: 1} [model_handling.py at line 1545]  -Generated helas calls for 18 subprocesses (372 diagrams) in 1.297 s -Wrote files for 810 helas calls in 4.490 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.293 s +Wrote files for 810 helas calls in 3.534 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.341 s +ALOHA: aloha creates 5 routines in 0.335 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.318 s +ALOHA: aloha creates 10 routines in 0.315 s VVV1 VVV1 FFV1 @@ -1100,10 +1100,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m12.229s -user 0m10.360s -sys 0m0.958s -Code generation completed in 13 seconds +real 0m11.245s +user 0m10.299s +sys 0m0.899s +Code generation completed in 12 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/Bridge.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f index 7bd8ec493e..c08c7c485d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f index b1f45c3af7..a912a12c0f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f @@ -143,7 +143,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -249,7 +248,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -294,8 +292,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f index c4e476d6c0..868a3ef6c6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f @@ -571,7 +571,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -597,9 +597,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -613,22 +614,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f index 8d74ac5b98..d30687b866 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f @@ -146,7 +146,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -252,7 +251,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -297,8 +295,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index c9ca1538d3..1c3ba92e6d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f index 3d035277eb..6fdf8a8d07 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -159,7 +159,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -265,7 +264,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -310,8 +308,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index 2c11f53b89..3d7efb5585 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -560,7 +560,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -576,7 +576,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -586,9 +586,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -602,22 +603,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f index 0a318e1c05..259aaec8a1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -162,7 +162,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -268,7 +267,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -313,8 +311,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index d829a73049..d65bac7611 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -560,7 +560,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -576,7 +576,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -586,9 +586,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -602,22 +603,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f index f012b48d83..f85cd82256 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -162,7 +162,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -268,7 +267,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -313,8 +311,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f index 0eb22610bf..89f360f028 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f @@ -571,7 +571,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -597,9 +597,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -613,22 +614,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f index 3d2319b36a..2f6c72fb43 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f @@ -162,7 +162,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -268,7 +267,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -313,8 +311,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f index 6a17e242b2..85dd15d507 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f index 926b17aa45..2d877b9bc0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f @@ -191,7 +191,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -297,7 +296,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -342,8 +340,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f index a952958df8..0717127ecc 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f @@ -549,7 +549,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -565,7 +565,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -575,9 +575,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -591,22 +592,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f index 520aaec0b1..74f9ed957c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f @@ -194,7 +194,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -300,7 +299,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -345,8 +343,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f index a41c6f876a..78a109f493 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f @@ -560,7 +560,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -576,7 +576,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -586,9 +586,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -602,22 +603,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f index f77bfa066c..07469eded9 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f @@ -194,7 +194,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -300,7 +299,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -345,8 +343,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f index 700cdbece2..e40cd6c43f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f @@ -560,7 +560,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -576,7 +576,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -586,9 +586,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -602,22 +603,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f index 4c36b4bcce..a72674b621 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f @@ -194,7 +194,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -300,7 +299,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -345,8 +343,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f index bc898ac10e..7648cf57b1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -603,7 +603,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -613,9 +613,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -629,22 +630,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f index eec298dc6c..1ea1b00778 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f @@ -196,7 +196,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -302,7 +301,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -347,8 +345,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f index 3db88ba2c3..deb87c2e1c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f @@ -659,7 +659,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -675,7 +675,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -685,9 +685,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -701,22 +702,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f index a530c382f1..62460f03a4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f @@ -202,7 +202,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -308,7 +307,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -353,8 +351,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f index 8988ba6c1d..bd3cb3fcff 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f @@ -571,7 +571,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -597,9 +597,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -613,22 +614,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f index f6d8294bd3..e4c318e9f7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f @@ -194,7 +194,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -300,7 +299,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -345,8 +343,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f index 37b6741d5b..ac61617b61 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f @@ -659,7 +659,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -675,7 +675,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -685,9 +685,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -701,22 +702,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f index 4b974a1e79..b2be8a2661 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f @@ -202,7 +202,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -308,7 +307,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -353,8 +351,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f index 4f5f2bb65a..f0bf648d9b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f @@ -571,7 +571,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -597,9 +597,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -613,22 +614,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f index 3c33819612..ab5c2f5dcc 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f @@ -194,7 +194,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -300,7 +299,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -345,8 +343,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f index 598e4f55b8..e7b63d08c4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f @@ -571,7 +571,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -597,9 +597,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -613,22 +614,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f index 485ad633d3..db949d4977 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f @@ -194,7 +194,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -300,7 +299,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -345,8 +343,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f index dd3cd5c8a4..765f218d09 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -603,7 +603,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -613,9 +613,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -629,22 +630,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f index 16d80c44b6..f921e966b9 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f @@ -196,7 +196,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -302,7 +301,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -347,8 +345,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f index ef5dde5b56..8284af5cac 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f @@ -571,7 +571,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -597,9 +597,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -613,22 +614,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f index 5510afb41e..c0df727705 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f @@ -194,7 +194,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -300,7 +299,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -345,8 +343,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index fe284c1cc5..d55f30f145 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.13704657554626465  +DEBUG: model prefixing takes 0.13804030418395996  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.722 s +1 processes with 72 diagrams generated in 3.673 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -115,7 +115,7 @@ INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ t t~ @1 INFO: Creating files in directory P1_gg_ttxttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -134,22 +134,22 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxttx DEBUG: len(subproc_diagrams_for_config) =  70 [model_handling.py at line 1520]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [model_handling.py at line 1544]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [model_handling.py at line 1545]  -Generated helas calls for 1 subprocesses (72 diagrams) in 0.187 s -Wrote files for 119 helas calls in 0.437 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.185 s +Wrote files for 119 helas calls in 0.432 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.318 s +ALOHA: aloha creates 5 routines in 0.317 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.329 s +ALOHA: aloha creates 10 routines in 0.333 s VVV5 VVV5 FFV1 @@ -193,10 +193,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m7.228s -user 0m6.896s -sys 0m0.299s -Code generation completed in 8 seconds +real 0m7.220s +user 0m6.848s +sys 0m0.283s +Code generation completed in 7 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/Bridge.h b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f index 86efacfe7f..461cfa8224 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/fcheck_sa.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/fcheck_sa.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f index 4d5cb63761..d96ba556c5 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f @@ -191,7 +191,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -297,7 +296,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -342,8 +340,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/counters.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index 62d7042d00..4fb7228286 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -77,7 +77,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.13954997062683105  +DEBUG: model prefixing takes 0.13859224319458008  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -92,7 +92,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.727 s +1 processes with 72 diagrams generated in 3.821 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -122,7 +122,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.322 s +ALOHA: aloha creates 5 routines in 0.316 s VVV5 VVV5 FFV1 @@ -142,7 +142,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.137s -user 0m5.030s -sys 0m0.072s +real 0m5.206s +user 0m5.107s +sys 0m0.076s Code generation completed in 5 seconds diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/Bridge.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fcheck_sa.f b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fcheck_sa.f +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 37089500b4..49e61427c5 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.125 s +1 processes with 6 diagrams generated in 0.130 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 INFO: Creating files in directory P1_gg_t1t1x DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -597,7 +597,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_t1t1x DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [model_handling.py at line 1544]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [model_handling.py at line 1545]  Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s -Wrote files for 16 helas calls in 0.125 s +Wrote files for 16 helas calls in 0.126 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines @@ -607,7 +607,7 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.180 s +ALOHA: aloha creates 6 routines in 0.182 s VVV1 VSS1 VSS1 @@ -647,10 +647,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.044s -user 0m2.726s -sys 0m0.300s -Code generation completed in 3 seconds +real 0m3.278s +user 0m2.733s +sys 0m0.284s +Code generation completed in 4 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/Bridge.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f index 69a8372b3e..0170f78a25 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/fcheck_sa.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/fcheck_sa.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f index b1f74c86e4..bfb95cf2ee 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f @@ -131,7 +131,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -237,7 +236,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -282,8 +280,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/counters.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index c2f899fe3e..1085728e17 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -582,7 +582,7 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.185 s +ALOHA: aloha creates 3 routines in 0.183 s VVV1 VSS1 VSS1 @@ -598,7 +598,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.342s -user 0m1.248s -sys 0m0.080s +real 0m1.401s +user 0m1.286s +sys 0m0.057s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/Bridge.h b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/fcheck_sa.f b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/fcheck_sa.f +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index 4f86b653e0..a1082c61f1 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -577,7 +577,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1152]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -597,15 +597,15 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1544]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1545]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.119 s +Wrote files for 10 helas calls in 0.116 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.137 s +ALOHA: aloha creates 2 routines in 0.139 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.136 s +ALOHA: aloha creates 4 routines in 0.135 s VVV1 FFV1 FFV1 @@ -640,10 +640,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.851s -user 0m2.590s -sys 0m0.316s -Code generation completed in 4 seconds +real 0m2.872s +user 0m2.564s +sys 0m0.301s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/Bridge.h b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index f9e2335de4..d5accb9fb2 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f index bc79ed4217..aa332cd578 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -143,7 +143,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -249,7 +248,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -294,8 +292,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/counters.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index 45e10ca3ac..8479028997 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -554,7 +554,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.122 s +1 processes with 3 diagrams generated in 0.121 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -581,7 +581,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.137 s +ALOHA: aloha creates 2 routines in 0.136 s VVV1 FFV1 FFV1 @@ -596,7 +596,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m1.308s -user 0m1.215s -sys 0m0.063s +real 0m1.278s +user 0m1.188s +sys 0m0.072s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/Bridge.h b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/fcheck_sa.f b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk index 359f16c029..9cff5e1a60 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %/bin/hipcc,%,$(shell which hipcc 2>/dev/null)) -# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists -# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) -ifneq ($(CUDA_HOME),) - USE_NVTX ?=-DUSE_NVTX - CUDA_INC = -I$(CUDA_HOME)/include/ +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965) +ifeq ($(CUDA_HOME),) + # CUDA_HOME is empty (nvcc not found) + override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/),) + # CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist? + override CUDA_INC= else + CUDA_INC = -I$(CUDA_HOME)/include/ +endif +###$(info CUDA_INC=$(CUDA_INC)) + +# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965) +ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist override USE_NVTX= - override CUDA_INC= +else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),) + # $(CUDA_HOME)/include/ exists but NVTX headers do not exist? + override USE_NVTX= +else + # $(CUDA_HOME)/include/nvtx.h exists: use NVTX + # (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed) + override USE_NVTX=-DUSE_NVTX endif +###$(info USE_NVTX=$(USE_NVTX)) # NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) # - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. @@ -424,13 +440,18 @@ endif # (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) ifeq ($(HASCURAND),) ifeq ($(GPUCC),) # CPU-only build - ifneq ($(CUDA_HOME),) + ifeq ($(CUDA_INC),) + # $(CUDA_HOME)/include/ does not exist (see #965) + override HASCURAND = hasNoCurand + else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),) + # $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965) + override HASCURAND = hasNoCurand + else # By default, assume that curand is installed if a CUDA installation exists override HASCURAND = hasCurand - else - override HASCURAND = hasNoCurand endif else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + # By default, assume that curand is installed if a CUDA build is requested override HASCURAND = hasCurand else # non-Nvidia GPU build override HASCURAND = hasNoCurand diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index b8847b7cb6..01107f564b 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:15:24 +DATE: 2024-08-08_20:42:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7267s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7180s - [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6950s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6868s + [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1784s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1702s - [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1770s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1693s + [COUNTERS] Fortran MEs ( 1 ) : 0.0077s for 8192 events => throughput is 1.07E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3705s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2837s - [COUNTERS] Fortran MEs ( 1 ) : 0.0868s for 90112 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3730s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2895s + [COUNTERS] Fortran MEs ( 1 ) : 0.0835s for 90112 events => throughput is 1.08E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1849s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1783s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1777s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1702s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.14E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3665s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2911s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0754s for 90112 events => throughput is 1.19E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3648s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2879s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0766s for 90112 events => throughput is 1.18E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.152979e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.167196e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.181726e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.165900e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1799s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1755s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1752s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1704s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.83E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3337s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2882s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0455s for 90112 events => throughput is 1.98E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3353s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2887s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0463s for 90112 events => throughput is 1.94E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.930086e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.918558e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.984749e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.023579e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1789s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1758s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.61E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1786s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1750s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.48E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3219s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2881s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0338s for 90112 events => throughput is 2.67E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3295s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2928s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0363s for 90112 events => throughput is 2.48E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.539184e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.640473e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.709927e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.831088e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1781s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1751s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.77E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1752s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1718s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3264s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2932s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0332s for 90112 events => throughput is 2.71E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3209s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2867s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0338s for 90112 events => throughput is 2.66E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.635946e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.678759e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.766552e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.813366e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1769s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1731s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0038s for 8192 events => throughput is 2.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1736s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1692s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0040s for 8192 events => throughput is 2.04E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3421s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3007s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0414s for 90112 events => throughput is 2.18E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3322s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2913s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0405s for 90112 events => throughput is 2.22E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.124836e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.108602e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.174846e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.253882e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.6131s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6126s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.49E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6096s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6084s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.32E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7401s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7351s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.79E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7166s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7111s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.84E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.749639e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.377977e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.937376e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.939853e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.647566e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.088090e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.462558e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.478718e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.641748e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.243737e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.002002e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.989285e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.659137e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.238682e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.136180e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.131222e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 587bb76d73..617aae1ec8 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,17 +1,17 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cppavx2 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:15:40 +DATE: 2024-08-08_20:43:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7250s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7166s - [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7259s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7175s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.72E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1873s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1787s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1878s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1797s + [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3976s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3042s - [COUNTERS] Fortran MEs ( 1 ) : 0.0933s for 90112 events => throughput is 9.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3875s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3018s + [COUNTERS] Fortran MEs ( 1 ) : 0.0857s for 90112 events => throughput is 1.05E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382703205998396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1981s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1911s + [COUNTERS] PROGRAM TOTAL : 0.1866s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1794s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.17E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515590123565249E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3643s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2913s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0730s for 90112 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3784s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3020s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0761s for 90112 events => throughput is 1.18E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.200646e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.232262e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.237867e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.234403e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700723828302E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1729s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1703s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1808s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1776s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.88E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587612890761E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3200s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2910s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0290s for 90112 events => throughput is 3.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3276s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2977s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0297s for 90112 events => throughput is 3.03E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.187376e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.119755e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.220665e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.282267e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1773s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1751s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.62E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1827s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.27E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3126s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2872s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 90112 events => throughput is 3.54E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3317s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3038s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0276s for 90112 events => throughput is 3.26E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.488004e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.481016e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.612737e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.570800e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1769s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1747s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.74E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1855s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1828s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.35E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3121s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2873s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 90112 events => throughput is 3.65E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3314s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3041s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 90112 events => throughput is 3.33E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.702442e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.644439e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.804759e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.697078e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382704335459282E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1768s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1743s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1845s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1814s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.04E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515591296252558E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3213s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2952s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0261s for 90112 events => throughput is 3.45E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3372s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3079s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0290s for 90112 events => throughput is 3.10E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.323941e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.387501e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.761942e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.616268e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382706077425631E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.5917s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5912s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.66E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6084s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6073s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.48E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515592892887687E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7121s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7074s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.92E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7292s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7238s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.86E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.122217e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.601368e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.609665e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.718163e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.582586e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.633474e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.902910e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.898384e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.573732e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.829286e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.085324e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.104797e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.048658e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.012752e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.737473e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.802072e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index f580fe1044..e51bbf394d 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:15:57 +DATE: 2024-08-08_20:43:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.6825s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6742s - [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.83E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6983s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6906s + [COUNTERS] Fortran MEs ( 1 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,8 +83,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1788s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1708s + [COUNTERS] PROGRAM TOTAL : 0.1791s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1711s [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3707s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2832s - [COUNTERS] Fortran MEs ( 1 ) : 0.0875s for 90112 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3694s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2869s + [COUNTERS] Fortran MEs ( 1 ) : 0.0825s for 90112 events => throughput is 1.09E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701395E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1866s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1792s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1846s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1767s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3744s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2956s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0787s for 90112 events => throughput is 1.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3660s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2865s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0792s for 90112 events => throughput is 1.14E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.113565e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.124575e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.142626e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.154252e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1769s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1728s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0040s for 8192 events => throughput is 2.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1757s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1709s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.88E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3372s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2926s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0446s for 90112 events => throughput is 2.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3336s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2878s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0455s for 90112 events => throughput is 1.98E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.964919e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.982594e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.051752e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.052848e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1805s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1771s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.46E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1749s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1711s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.31E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3444s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3072s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0371s for 90112 events => throughput is 2.43E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3282s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2920s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0358s for 90112 events => throughput is 2.51E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.534941e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.552156e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.720296e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.649390e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1857s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1827s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.69E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1744s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1708s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.47E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3391s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3032s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0359s for 90112 events => throughput is 2.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3217s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2876s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0337s for 90112 events => throughput is 2.67E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.692433e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.650509e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.750694e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.719714e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1831s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1796s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.31E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1750s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1712s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.36E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3483s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3066s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0416s for 90112 events => throughput is 2.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3264s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2866s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0395s for 90112 events => throughput is 2.28E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.156388e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.207219e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.257287e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.300574e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715392009194E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.5936s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5931s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.57E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5992s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5980s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.38E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602021089631E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7192s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7142s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.81E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7158s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7101s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.80E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.699826e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.054665e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.973881e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.970842e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.642059e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.242307e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.500425e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.491734e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.034916e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.221256e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.050339e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.104459e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.807602e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.208981e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.156854e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.160987e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index ccecc02825..8d24f348d7 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -3,16 +3,16 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:16:14 +DATE: 2024-08-08_20:43:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8426s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7989s - [COUNTERS] Fortran MEs ( 1 ) : 0.0437s for 8192 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8083s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7667s + [COUNTERS] Fortran MEs ( 1 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4393s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3952s - [COUNTERS] Fortran MEs ( 1 ) : 0.0441s for 8192 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4194s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3777s + [COUNTERS] Fortran MEs ( 1 ) : 0.0417s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.8386s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3588s - [COUNTERS] Fortran MEs ( 1 ) : 0.4798s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7491s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2980s + [COUNTERS] Fortran MEs ( 1 ) : 0.4511s for 90112 events => throughput is 2.00E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.5057s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4597s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0460s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4196s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3765s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0426s for 8192 events => throughput is 1.92E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989099] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.9929s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4771s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5158s for 90112 events => throughput is 1.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7813s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2997s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4811s for 90112 events => throughput is 1.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.819871e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.879822e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.838165e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.903748e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4393s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4133s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0260s for 8192 events => throughput is 3.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4107s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3853s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0250s for 8192 events => throughput is 3.28E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989106] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6401s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3507s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2894s for 90112 events => throughput is 3.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5717s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3004s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2709s for 90112 events => throughput is 3.33E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.138027e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.310019e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.282175e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.203674e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4390s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4228s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0162s for 8192 events => throughput is 5.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3916s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3758s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0153s for 8192 events => throughput is 5.35E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5071s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3263s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1808s for 90112 events => throughput is 4.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4759s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3059s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1696s for 90112 events => throughput is 5.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.258519e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.223657e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.342811e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.200982e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4034s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3888s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0146s for 8192 events => throughput is 5.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3953s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3808s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0141s for 8192 events => throughput is 5.80E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4583s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2960s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1623s for 90112 events => throughput is 5.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4542s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3022s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1516s for 90112 events => throughput is 5.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.840330e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.865744e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.907730e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.035557e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3956s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3739s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0217s for 8192 events => throughput is 3.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4098s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3854s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0239s for 8192 events => throughput is 3.43E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4701s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2380s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2321s for 90112 events => throughput is 3.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5428s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3039s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2384s for 90112 events => throughput is 3.78E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.585624e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.669812e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.722456e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.898434e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8009s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8003s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.42E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8047s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8033s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.27E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7129s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7060s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 90112 events => throughput is 1.31E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7304s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7231s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.37E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.914318e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.008892e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.613535e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.654647e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.870641e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.331472e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.085338e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.082448e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.804787e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.310542e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.159663e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.160861e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.702330e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.331806e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.067580e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.063253e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index f46b75eef7..420861126b 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx - make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:16:41 +DATE: 2024-08-08_20:44:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.7783s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7371s - [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8019s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7604s + [COUNTERS] Fortran MEs ( 1 ) : 0.0415s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3978s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3570s - [COUNTERS] Fortran MEs ( 1 ) : 0.0407s for 8192 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4215s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3800s + [COUNTERS] Fortran MEs ( 1 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6995s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2512s - [COUNTERS] Fortran MEs ( 1 ) : 0.4483s for 90112 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7567s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3058s + [COUNTERS] Fortran MEs ( 1 ) : 0.4510s for 90112 events => throughput is 2.00E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094179692708323] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4346s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3943s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0403s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4203s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3790s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105688388783328] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6901s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2471s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4430s for 90112 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7678s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3093s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4582s for 90112 events => throughput is 1.97E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.030986e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.984608e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.985412e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.996032e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094175707109216] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3873s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3704s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0168s for 8192 events => throughput is 4.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3923s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3751s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0170s for 8192 events => throughput is 4.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684583433771] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4062s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2257s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1804s for 90112 events => throughput is 4.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4893s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3053s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1837s for 90112 events => throughput is 4.90E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.724911e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.831484e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.762208e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.765454e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094173726920275] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3646s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3873s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3779s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 8192 events => throughput is 8.99E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684037363524] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3134s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2174s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0960s for 90112 events => throughput is 9.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4091s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3116s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0972s for 90112 events => throughput is 9.27E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.106025e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.995090e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.235160e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.148417e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094173726920275] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3725s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3646s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3894s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3807s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.68E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684037363524] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3066s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2161s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0906s for 90112 events => throughput is 9.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3961s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3040s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0917s for 90112 events => throughput is 9.82E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.917882e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.994646e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.709952e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.882184e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094178448427996] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3747s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3637s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0110s for 8192 events => throughput is 7.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3945s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3828s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105688391432061] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3429s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2200s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1229s for 90112 events => throughput is 7.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5017s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3657s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1356s for 90112 events => throughput is 6.64E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.804453e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.837763e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.929498e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.925566e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184162782994] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.7869s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7863s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.57E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8112s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8099s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.43E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105694501043516] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6604s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6548s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0056s for 90112 events => throughput is 1.61E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7829s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7765s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 90112 events => throughput is 1.58E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.908595e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.085941e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.266019e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.178660e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.049207e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.983696e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.416476e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.406286e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.062460e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.010543e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.551978e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.536473e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.413712e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.527299e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.519394e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.475317e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index dc1bcf4827..65f004f30e 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -3,20 +3,20 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:17:06 +DATE: 2024-08-08_20:44:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.7779s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7372s - [COUNTERS] Fortran MEs ( 1 ) : 0.0407s for 8192 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8115s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7704s + [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3947s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3534s - [COUNTERS] Fortran MEs ( 1 ) : 0.0413s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4214s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3805s + [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7153s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2615s - [COUNTERS] Fortran MEs ( 1 ) : 0.4538s for 90112 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7670s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3128s + [COUNTERS] Fortran MEs ( 1 ) : 0.4542s for 90112 events => throughput is 1.98E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4374s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3938s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0436s for 8192 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4222s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3775s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0442s for 8192 events => throughput is 1.85E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006634] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7264s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2487s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4777s for 90112 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7889s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3008s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4877s for 90112 events => throughput is 1.85E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.883629e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.863098e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.895894e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.876650e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4011s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3774s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4042s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3795s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0242s for 8192 events => throughput is 3.38E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006626] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4880s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2278s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2602s for 90112 events => throughput is 3.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5750s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3065s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2680s for 90112 events => throughput is 3.36E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.317423e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.334875e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.373113e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.372227e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3877s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3732s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0145s for 8192 events => throughput is 5.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3946s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3794s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.56E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3817s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2190s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1627s for 90112 events => throughput is 5.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4696s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3034s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1658s for 90112 events => throughput is 5.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.391740e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.223051e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.381923e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.767945e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3811s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3680s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.26E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4019s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3874s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0142s for 8192 events => throughput is 5.78E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3634s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2153s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1482s for 90112 events => throughput is 6.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4595s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3077s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1514s for 90112 events => throughput is 5.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.907882e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.889622e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.901678e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.919078e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3949s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3745s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0204s for 8192 events => throughput is 4.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4002s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3783s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.82E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4596s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2305s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2291s for 90112 events => throughput is 3.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5451s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3093s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2354s for 90112 events => throughput is 3.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.830144e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.737875e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.799392e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.863403e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184798437830] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.7822s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7816s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.47E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8029s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8014s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.26E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279068492] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6477s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6414s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.42E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7390s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7315s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.36E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.955290e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.004360e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.610728e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.618155e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.721513e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.337805e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.057949e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.064726e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.718962e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.321717e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.127927e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.141622e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.720504e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.487761e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.981768e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.948699e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index daad34ef63..c52a8af2f9 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -3,9 +3,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:17:32 +DATE: 2024-08-08_20:45:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.6619s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3441s - [COUNTERS] Fortran MEs ( 1 ) : 0.3178s for 8192 events => throughput is 2.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6887s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3666s + [COUNTERS] Fortran MEs ( 1 ) : 0.3221s for 8192 events => throughput is 2.54E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6512s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3209s - [COUNTERS] Fortran MEs ( 1 ) : 0.3303s for 8192 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6558s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3350s + [COUNTERS] Fortran MEs ( 1 ) : 0.3208s for 8192 events => throughput is 2.55E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.0566s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4770s - [COUNTERS] Fortran MEs ( 1 ) : 3.5796s for 90112 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1103s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5412s + [COUNTERS] Fortran MEs ( 1 ) : 3.5692s for 90112 events => throughput is 2.52E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.9732s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3320s for 8192 events => throughput is 2.47E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6762s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3380s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3370s for 8192 events => throughput is 2.43E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.4802s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8183s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6619s for 90112 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2687s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5495s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.7180s for 90112 events => throughput is 2.42E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.542070e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.517328e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.548855e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.477316e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607748863] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6596s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4845s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1750s for 8192 events => throughput is 4.68E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5207s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3399s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1801s for 8192 events => throughput is 4.55E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.5748s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6476s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9272s for 90112 events => throughput is 4.68E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.4936s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5370s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9559s for 90112 events => throughput is 4.61E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.742087e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.723167e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.785084e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.710741e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4868s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4003s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0864s for 8192 events => throughput is 9.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4289s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3383s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0899s for 8192 events => throughput is 9.11E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.5034s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5585s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9448s for 90112 events => throughput is 9.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.5415s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5644s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9765s for 90112 events => throughput is 9.23E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.300762e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.063994e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.558512e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.113779e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4736s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3967s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0769s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4521s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3684s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0831s for 8192 events => throughput is 9.86E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.4089s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5590s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8498s for 90112 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4440s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5615s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8818s for 90112 events => throughput is 1.02E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.073418e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056563e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.076553e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.066565e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5274s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4198s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1076s for 8192 events => throughput is 7.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4498s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3385s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1106s for 8192 events => throughput is 7.41E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.8167s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6114s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2053s for 90112 events => throughput is 7.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7606s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5479s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2119s for 90112 events => throughput is 7.44E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.600195e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.524660e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.737653e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.502357e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7530s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7476s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8444s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8355s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 8192 events => throughput is 1.38E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717736E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9540s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9311s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0229s for 90112 events => throughput is 3.93E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9827s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9565s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 90112 events => throughput is 3.86E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.632787e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.637288e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.212630e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.243124e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.415553e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002014e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.240897e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.239487e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.415697e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002136e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.251024e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.250655e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.408414e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001900e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.766536e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.746731e+06 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 51c84bcce7..b25cff31e4 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -2,12 +2,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:18:15 +DATE: 2024-08-08_20:45:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.6622s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3419s - [COUNTERS] Fortran MEs ( 1 ) : 0.3203s for 8192 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6879s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3658s + [COUNTERS] Fortran MEs ( 1 ) : 0.3221s for 8192 events => throughput is 2.54E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6540s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3233s - [COUNTERS] Fortran MEs ( 1 ) : 0.3307s for 8192 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6575s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3322s + [COUNTERS] Fortran MEs ( 1 ) : 0.3252s for 8192 events => throughput is 2.52E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.0535s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4792s - [COUNTERS] Fortran MEs ( 1 ) : 3.5744s for 90112 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0903s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5245s + [COUNTERS] Fortran MEs ( 1 ) : 3.5658s for 90112 events => throughput is 2.53E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112722616246457] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.9529s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6280s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3249s for 8192 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6630s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3346s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3273s for 8192 events => throughput is 2.50E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238468293717765E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.7771s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9289s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8482s for 90112 events => throughput is 2.34E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1318s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5454s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5854s for 90112 events => throughput is 2.51E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.295265e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.562809e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.274937e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.549301e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112720694019242] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5768s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4654s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1114s for 8192 events => throughput is 7.35E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4414s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0997s for 8192 events => throughput is 8.22E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238454783817719E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.9013s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7217s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1796s for 90112 events => throughput is 7.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6571s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5548s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1018s for 90112 events => throughput is 8.18E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.824875e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.333170e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.970843e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.397937e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112721757974454] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4346s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3870s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0476s for 8192 events => throughput is 1.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3825s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3366s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0455s for 8192 events => throughput is 1.80E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238453732924513E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.2132s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6840s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5293s for 90112 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0649s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5567s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5077s for 90112 events => throughput is 1.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.559003e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.821951e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.455805e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.834362e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112721757974454] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4492s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4015s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0477s for 8192 events => throughput is 1.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3803s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3381s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238453732924513E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.1232s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6431s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4802s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0303s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5712s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4587s for 90112 events => throughput is 1.96E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.558817e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.018262e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.780926e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.019326e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112723389095883] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4670s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4037s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0633s for 8192 events => throughput is 1.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3929s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3375s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0549s for 8192 events => throughput is 1.49E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238464413054557E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.3110s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6631s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6480s for 90112 events => throughput is 1.39E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1189s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5295s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5889s for 90112 events => throughput is 1.53E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.404960e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.561264e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.473914e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.545662e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112725654777677] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7573s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7564s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 8.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7590s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7568s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0010s for 8192 events => throughput is 8.12E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238470908598507E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9333s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9227s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 90112 events => throughput is 8.47E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9627s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9510s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0105s for 90112 events => throughput is 8.59E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.129185e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.151184e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.549775e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.548948e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.539449e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.576425e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.726001e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.715469e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.546076e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.585156e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.761871e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.753005e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.365545e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.440113e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.281294e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.293588e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index b3a8db7192..b6592dfe65 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -2,21 +2,21 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - - -make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:18:54 +DATE: 2024-08-08_20:46:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.6896s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3548s - [COUNTERS] Fortran MEs ( 1 ) : 0.3348s for 8192 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6929s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3702s + [COUNTERS] Fortran MEs ( 1 ) : 0.3227s for 8192 events => throughput is 2.54E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6450s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3225s - [COUNTERS] Fortran MEs ( 1 ) : 0.3225s for 8192 events => throughput is 2.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6641s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3385s + [COUNTERS] Fortran MEs ( 1 ) : 0.3256s for 8192 events => throughput is 2.52E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.1061s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5000s - [COUNTERS] Fortran MEs ( 1 ) : 3.6061s for 90112 events => throughput is 2.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1698s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5570s + [COUNTERS] Fortran MEs ( 1 ) : 3.6128s for 90112 events => throughput is 2.49E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748700702684] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.9893s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6448s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3444s for 8192 events => throughput is 2.38E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6766s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3338s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3416s for 8192 events => throughput is 2.40E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482679400354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.6402s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8430s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.7972s for 90112 events => throughput is 2.37E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3154s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5455s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.7687s for 90112 events => throughput is 2.39E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.504502e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.463950e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.493736e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.478616e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748702805033] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6596s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4865s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1730s for 8192 events => throughput is 4.73E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5103s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3345s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1749s for 8192 events => throughput is 4.68E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482683055667E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.5611s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6531s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9081s for 90112 events => throughput is 4.72E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.4746s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5384s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9354s for 90112 events => throughput is 4.66E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.885906e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.832626e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.454856e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.815562e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5422s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4457s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0965s for 8192 events => throughput is 8.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4266s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3394s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0865s for 8192 events => throughput is 9.47E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.8283s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7674s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0609s for 90112 events => throughput is 8.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4911s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5269s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9636s for 90112 events => throughput is 9.35E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.150394e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.435081e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.706600e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.477580e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5467s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4535s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0932s for 8192 events => throughput is 8.79E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4142s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3362s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0774s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.5615s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6701s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8914s for 90112 events => throughput is 1.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3905s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5342s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8556s for 90112 events => throughput is 1.05E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.036553e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.087061e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.038231e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.088736e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748700265108] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5849s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4623s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1226s for 8192 events => throughput is 6.68E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4463s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3356s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1100s for 8192 events => throughput is 7.45E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482666076374E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.8307s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6135s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2171s for 90112 events => throughput is 7.40E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7724s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5419s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2297s for 90112 events => throughput is 7.33E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.036325e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.268797e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.017098e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.343356e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748601943165] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7581s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7527s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7682s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7592s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.36E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481937154381E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9432s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9204s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.95E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9875s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9612s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 90112 events => throughput is 3.86E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.628045e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.654166e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.092999e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.808330e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.285885e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001990e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.235467e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.235577e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.248772e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.000218e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.246161e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.245999e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.266042e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.996930e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.742147e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.726284e+06 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index a3214916d8..9f965c04b5 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -4,8 +4,8 @@ make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,14 +13,14 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:19:38 +DATE: 2024-08-08_20:47:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 187 events) - [COUNTERS] PROGRAM TOTAL : 4.3806s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2416s - [COUNTERS] Fortran MEs ( 1 ) : 4.1390s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5167s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2657s + [COUNTERS] Fortran MEs ( 1 ) : 4.2511s for 8192 events => throughput is 1.93E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.4016s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2413s - [COUNTERS] Fortran MEs ( 1 ) : 4.1603s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4866s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2643s + [COUNTERS] Fortran MEs ( 1 ) : 4.2223s for 8192 events => throughput is 1.94E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 47.5196s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7600s - [COUNTERS] Fortran MEs ( 1 ) : 45.7596s for 90112 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 48.4461s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8443s + [COUNTERS] Fortran MEs ( 1 ) : 46.6018s for 90112 events => throughput is 1.93E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222236] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 8.8038s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4979s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3060s for 8192 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6404s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2618s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3690s for 8192 events => throughput is 1.88E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0096s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 53.4475s - [COUNTERS] Fortran Overhead ( 0 ) : 5.9535s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.4939s for 90112 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 49.9380s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7954s + [COUNTERS] CudaCpp MEs ( 2 ) : 48.1336s for 90112 events => throughput is 1.87E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0090s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.922216e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.926413e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.959395e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.935484e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222236] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.7615s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4839s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2777s for 8192 events => throughput is 3.60E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6125s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2606s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3472s for 8192 events => throughput is 3.49E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0046s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099785] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 30.6075s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0936s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.5139s for 90112 events => throughput is 3.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.5257s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8027s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.7180s for 90112 events => throughput is 3.50E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.496612e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.649842e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.539346e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.636818e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.2183s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2245s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9937s for 8192 events => throughput is 8.24E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.2653s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2598s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0030s for 8192 events => throughput is 8.17E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 14.0676s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7785s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.2891s for 90112 events => throughput is 7.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.8598s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7908s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.0665s for 90112 events => throughput is 8.14E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.419384e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.344831e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.678823e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.416676e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.9878s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1062s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8816s for 8192 events => throughput is 9.29E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.1673s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2599s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9051s for 8192 events => throughput is 9.05E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 12.3962s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6208s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.7754s for 90112 events => throughput is 9.22E+03 events/s + [COUNTERS] PROGRAM TOTAL : 11.7872s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8132s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.9717s for 90112 events => throughput is 9.04E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.529916e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.472083e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.344439e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.534343e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.4488s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3506s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0982s for 8192 events => throughput is 7.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.3936s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2589s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1317s for 8192 events => throughput is 7.24E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 15.0103s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8885s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.1218s for 90112 events => throughput is 7.43E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.2691s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8171s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.4493s for 90112 events => throughput is 7.24E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.477284e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.935643e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.503375e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.348983e+03 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222225] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 0.7764s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7438s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0326s for 8192 events => throughput is 2.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7693s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6983s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0363s for 8192 events => throughput is 2.26E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0347s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099782] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 2.6460s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2818s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3642s for 90112 events => throughput is 2.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6062s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2048s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3669s for 90112 events => throughput is 2.46E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0344s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.283334e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.290486e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.506901e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.506388e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.117711e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.134196e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.183664e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.177921e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.114395e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.129278e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.181079e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.155764e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.111113e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.126990e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.444874e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.446377e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 565fe287ce..cd633f37c7 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:23:54 +DATE: 2024-08-08_20:51:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 187 events) - [COUNTERS] PROGRAM TOTAL : 4.3980s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2420s - [COUNTERS] Fortran MEs ( 1 ) : 4.1560s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4959s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2635s + [COUNTERS] Fortran MEs ( 1 ) : 4.2323s for 8192 events => throughput is 1.94E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.4132s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2372s - [COUNTERS] Fortran MEs ( 1 ) : 4.1760s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4788s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2631s + [COUNTERS] Fortran MEs ( 1 ) : 4.2156s for 8192 events => throughput is 1.94E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 47.3387s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7498s - [COUNTERS] Fortran MEs ( 1 ) : 45.5889s for 90112 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 48.4352s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8357s + [COUNTERS] Fortran MEs ( 1 ) : 46.5995s for 90112 events => throughput is 1.93E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320716615478996] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 8.4803s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3064s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1739s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5354s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2660s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2605s for 8192 events => throughput is 1.92E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0089s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558162567940870] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 52.0102s - [COUNTERS] Fortran Overhead ( 0 ) : 5.8677s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.1425s for 90112 events => throughput is 1.95E+03 events/s + [COUNTERS] PROGRAM TOTAL : 48.5468s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7982s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.7401s for 90112 events => throughput is 1.93E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.015479e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.996945e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.013117e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.982014e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320708851010073] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.5680s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4066s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1614s for 8192 events => throughput is 7.05E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.4573s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2634s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1914s for 8192 events => throughput is 6.88E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558157380141428] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 15.5194s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8862s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.6332s for 90112 events => throughput is 7.13E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.6570s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7854s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.8693s for 90112 events => throughput is 7.00E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.320236e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.255598e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.204448e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.246435e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320704806184321] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.2285s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7290s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4995s for 8192 events => throughput is 1.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7739s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2587s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5137s for 8192 events => throughput is 1.59E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558158459897135] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 7.7643s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2713s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4931s for 90112 events => throughput is 1.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.4672s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7991s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.6666s for 90112 events => throughput is 1.59E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.674256e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.606140e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.684230e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.576957e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320704806184321] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.1706s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6998s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4708s for 8192 events => throughput is 1.74E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7680s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2709s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4957s for 8192 events => throughput is 1.65E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558158459897135] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 7.1094s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2054s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.9040s for 90112 events => throughput is 1.84E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.7809s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7804s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.9992s for 90112 events => throughput is 1.80E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.876397e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.849666e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.833562e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.858554e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320713685871445] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.4404s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8351s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6053s for 8192 events => throughput is 1.35E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8187s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2599s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5571s for 8192 events => throughput is 1.47E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558162184774774] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 8.3122s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3041s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.0081s for 90112 events => throughput is 1.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.9104s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7899s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.1190s for 90112 events => throughput is 1.47E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.515129e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.496224e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.524953e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.504281e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320719394836651] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 0.7481s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7253s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7396s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6908s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.32E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0242s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558167135091578] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 2.4757s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2273s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2484s for 90112 events => throughput is 3.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4680s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1917s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2521s for 90112 events => throughput is 3.57E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0241s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.375249e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.382988e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.741898e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.717142e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.119355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.139748e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.307435e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.304954e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.151709e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.085623e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.303009e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.300454e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.049512e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.130448e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.396866e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.397157e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 93675b1fbf..27512be658 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,9 +1,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:27:14 +DATE: 2024-08-08_20:54:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 187 events) - [COUNTERS] PROGRAM TOTAL : 4.3736s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2409s - [COUNTERS] Fortran MEs ( 1 ) : 4.1327s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4700s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2619s + [COUNTERS] Fortran MEs ( 1 ) : 4.2081s for 8192 events => throughput is 1.95E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.3735s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2382s - [COUNTERS] Fortran MEs ( 1 ) : 4.1353s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4683s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2604s + [COUNTERS] Fortran MEs ( 1 ) : 4.2079s for 8192 events => throughput is 1.95E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 47.4052s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7410s - [COUNTERS] Fortran MEs ( 1 ) : 45.6641s for 90112 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 48.3196s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8366s + [COUNTERS] Fortran MEs ( 1 ) : 46.4830s for 90112 events => throughput is 1.94E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556893412546] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 8.8075s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4671s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3405s for 8192 events => throughput is 1.89E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6760s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2586s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.4088s for 8192 events => throughput is 1.86E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083370546855] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 53.8850s - [COUNTERS] Fortran Overhead ( 0 ) : 5.9787s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.9063s for 90112 events => throughput is 1.88E+03 events/s + [COUNTERS] PROGRAM TOTAL : 50.5724s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8031s + [COUNTERS] CudaCpp MEs ( 2 ) : 48.7604s for 90112 events => throughput is 1.85E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0089s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.948359e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.909521e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.951349e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.899981e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556780656974] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.7620s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4646s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2974s for 8192 events => throughput is 3.57E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5687s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2576s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3063s for 8192 events => throughput is 3.55E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083390630859] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 29.1849s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0005s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.1844s for 90112 events => throughput is 3.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.4318s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7915s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.6356s for 90112 events => throughput is 3.52E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0047s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.700037e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.646364e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.594428e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.634455e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.2008s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2107s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9900s for 8192 events => throughput is 8.27E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.2686s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2604s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0056s for 8192 events => throughput is 8.15E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 13.6123s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7227s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.8896s for 90112 events => throughput is 8.28E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.9032s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7920s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.1088s for 90112 events => throughput is 8.11E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.547846e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.153831e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.522651e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.410165e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.9517s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0869s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8648s for 8192 events => throughput is 9.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.1480s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2607s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8850s for 8192 events => throughput is 9.26E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 12.1428s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5992s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5436s for 90112 events => throughput is 9.44E+03 events/s + [COUNTERS] PROGRAM TOTAL : 11.5478s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7830s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.7625s for 90112 events => throughput is 9.23E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.818655e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.509937e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.752589e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.503575e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.4530s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3428s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1102s for 8192 events => throughput is 7.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.3881s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2592s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1259s for 8192 events => throughput is 7.28E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 16.4311s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0180s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.4132s for 90112 events => throughput is 6.72E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.4378s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7995s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.6355s for 90112 events => throughput is 7.13E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.834157e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.378664e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.943776e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.252552e+03 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556665261842] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 0.7919s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7589s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7612s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6909s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0360s for 8192 events => throughput is 2.27E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0343s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083224243403] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 2.7451s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3819s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3632s for 90112 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5943s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1940s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3660s for 90112 events => throughput is 2.46E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0343s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.281497e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.292672e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.508846e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.513091e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.120326e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.132768e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.183615e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.151465e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.108754e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.134281e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.168874e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.177596e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.121179e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.130147e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.453826e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.451952e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index a3ac7b5c1f..dab5f736a0 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -3,17 +3,17 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make USEBUILDDIR=1 BACKEND=cpp512y - make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:32:46 +DATE: 2024-08-08_20:59:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 101.2642s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5189s - [COUNTERS] Fortran MEs ( 1 ) : 100.7453s for 8192 events => throughput is 8.13E+01 events/s + [COUNTERS] PROGRAM TOTAL : 102.0811s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5116s + [COUNTERS] Fortran MEs ( 1 ) : 101.5694s for 8192 events => throughput is 8.07E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 98.9549s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5002s - [COUNTERS] Fortran MEs ( 1 ) : 98.4548s for 8192 events => throughput is 8.32E+01 events/s + [COUNTERS] PROGRAM TOTAL : 102.0739s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5163s + [COUNTERS] Fortran MEs ( 1 ) : 101.5576s for 8192 events => throughput is 8.07E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1078.9193s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3729s - [COUNTERS] Fortran MEs ( 1 ) : 1074.5464s for 90112 events => throughput is 8.39E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1120.7697s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3745s + [COUNTERS] Fortran MEs ( 1 ) : 1116.3951s for 90112 events => throughput is 8.07E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939193E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 226.9636s - [COUNTERS] Fortran Overhead ( 0 ) : 104.1252s - [COUNTERS] CudaCpp MEs ( 2 ) : 122.8384s for 8192 events => throughput is 6.67E+01 events/s + [COUNTERS] PROGRAM TOTAL : 122.6268s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5175s + [COUNTERS] CudaCpp MEs ( 2 ) : 121.9186s for 8192 events => throughput is 6.72E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1907s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1453.2216s - [COUNTERS] Fortran Overhead ( 0 ) : 108.8256s - [COUNTERS] CudaCpp MEs ( 2 ) : 1344.3960s for 90112 events => throughput is 6.70E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1388.7153s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3988s + [COUNTERS] CudaCpp MEs ( 2 ) : 1384.1234s for 90112 events => throughput is 6.51E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1931s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.957500e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.880201e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.966466e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.389775e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 110.0381s - [COUNTERS] Fortran Overhead ( 0 ) : 50.3711s - [COUNTERS] CudaCpp MEs ( 2 ) : 59.6670s for 8192 events => throughput is 1.37E+02 events/s + [COUNTERS] PROGRAM TOTAL : 60.8180s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5182s + [COUNTERS] CudaCpp MEs ( 2 ) : 60.1993s for 8192 events => throughput is 1.36E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656017E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 713.5237s - [COUNTERS] Fortran Overhead ( 0 ) : 54.3392s - [COUNTERS] CudaCpp MEs ( 2 ) : 659.1845s for 90112 events => throughput is 1.37E+02 events/s + [COUNTERS] PROGRAM TOTAL : 663.6261s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4076s + [COUNTERS] CudaCpp MEs ( 2 ) : 659.1171s for 90112 events => throughput is 1.37E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1014s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.640648e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.603881e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.642221e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.607115e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 51.6707s - [COUNTERS] Fortran Overhead ( 0 ) : 23.8142s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.8565s for 8192 events => throughput is 2.94E+02 events/s + [COUNTERS] PROGRAM TOTAL : 28.7968s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5160s + [COUNTERS] CudaCpp MEs ( 2 ) : 28.2344s for 8192 events => throughput is 2.90E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0464s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 333.1028s - [COUNTERS] Fortran Overhead ( 0 ) : 27.4583s - [COUNTERS] CudaCpp MEs ( 2 ) : 305.6444s for 90112 events => throughput is 2.95E+02 events/s + [COUNTERS] PROGRAM TOTAL : 314.6312s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4324s + [COUNTERS] CudaCpp MEs ( 2 ) : 310.1525s for 90112 events => throughput is 2.91E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0464s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.542437e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.378917e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.558618e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.496128e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 45.2154s - [COUNTERS] Fortran Overhead ( 0 ) : 20.7378s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.4776s for 8192 events => throughput is 3.35E+02 events/s + [COUNTERS] PROGRAM TOTAL : 25.3254s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5203s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.7644s for 8192 events => throughput is 3.31E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0408s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 293.9503s - [COUNTERS] Fortran Overhead ( 0 ) : 24.5760s - [COUNTERS] CudaCpp MEs ( 2 ) : 269.3742s for 90112 events => throughput is 3.35E+02 events/s + [COUNTERS] PROGRAM TOTAL : 277.9808s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4083s + [COUNTERS] CudaCpp MEs ( 2 ) : 273.5305s for 90112 events => throughput is 3.29E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0420s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.038194e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.986386e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.054728e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.006448e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 46.6506s - [COUNTERS] Fortran Overhead ( 0 ) : 22.6220s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.0285s for 8192 events => throughput is 3.41E+02 events/s + [COUNTERS] PROGRAM TOTAL : 25.0869s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5172s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.5238s for 8192 events => throughput is 3.34E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0459s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 291.3320s - [COUNTERS] Fortran Overhead ( 0 ) : 26.5806s - [COUNTERS] CudaCpp MEs ( 2 ) : 264.7513s for 90112 events => throughput is 3.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 271.0840s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3948s + [COUNTERS] CudaCpp MEs ( 2 ) : 266.6404s for 90112 events => throughput is 3.38E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0489s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.666698e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.641160e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.681846e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.622116e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 4.2374s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1562s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0812s for 8192 events => throughput is 7.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.2426s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0583s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0970s for 8192 events => throughput is 7.47E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0873s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 18.7608s - [COUNTERS] Fortran Overhead ( 0 ) : 6.8530s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.9078s for 90112 events => throughput is 7.57E+03 events/s + [COUNTERS] PROGRAM TOTAL : 17.9203s + [COUNTERS] Fortran Overhead ( 0 ) : 4.9107s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9249s for 90112 events => throughput is 7.56E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0847s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.502460e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.521131e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.309158e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.292650e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.222869e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.241733e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.577320e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.585186e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.238055e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.235154e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.437122e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.473644e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.224755e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.236111e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.231636e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.235762e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index a539e33f24..4ffdbee10a 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_23:01:56 +DATE: 2024-08-08_22:23:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 97.1861s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4946s - [COUNTERS] Fortran MEs ( 1 ) : 96.6914s for 8192 events => throughput is 8.47E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.3873s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5075s + [COUNTERS] Fortran MEs ( 1 ) : 100.8798s for 8192 events => throughput is 8.12E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 96.7329s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4973s - [COUNTERS] Fortran MEs ( 1 ) : 96.2355s for 8192 events => throughput is 8.51E+01 events/s + [COUNTERS] PROGRAM TOTAL : 102.2416s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5135s + [COUNTERS] Fortran MEs ( 1 ) : 101.7281s for 8192 events => throughput is 8.05E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1066.1378s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3607s - [COUNTERS] Fortran MEs ( 1 ) : 1061.7771s for 90112 events => throughput is 8.49E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1114.7300s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3428s + [COUNTERS] Fortran MEs ( 1 ) : 1110.3872s for 90112 events => throughput is 8.12E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405719945779552E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 204.5576s - [COUNTERS] Fortran Overhead ( 0 ) : 94.7222s - [COUNTERS] CudaCpp MEs ( 2 ) : 109.8354s for 8192 events => throughput is 7.46E+01 events/s + [COUNTERS] PROGRAM TOTAL : 111.0089s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5100s + [COUNTERS] CudaCpp MEs ( 2 ) : 110.3187s for 8192 events => throughput is 7.43E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1802s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -168,9 +169,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326290777570335E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1302.1844s - [COUNTERS] Fortran Overhead ( 0 ) : 96.3064s - [COUNTERS] CudaCpp MEs ( 2 ) : 1205.8781s for 90112 events => throughput is 7.47E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1216.8479s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4035s + [COUNTERS] CudaCpp MEs ( 2 ) : 1212.2644s for 90112 events => throughput is 7.43E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1800s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.892403e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.795452e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.896902e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.783118e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -212,9 +214,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405716994349971E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 50.4740s - [COUNTERS] Fortran Overhead ( 0 ) : 23.8797s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.5944s for 8192 events => throughput is 3.08E+02 events/s + [COUNTERS] PROGRAM TOTAL : 27.4750s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5164s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.9120s for 8192 events => throughput is 3.04E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0465s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -246,9 +249,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326284885505778E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 322.1801s - [COUNTERS] Fortran Overhead ( 0 ) : 27.6823s - [COUNTERS] CudaCpp MEs ( 2 ) : 294.4978s for 90112 events => throughput is 3.06E+02 events/s + [COUNTERS] PROGRAM TOTAL : 300.8248s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4082s + [COUNTERS] CudaCpp MEs ( 2 ) : 296.3700s for 90112 events => throughput is 3.04E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0466s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.518666e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.485944e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.484203e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.470723e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -290,9 +294,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405716646933743E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 26.5623s - [COUNTERS] Fortran Overhead ( 0 ) : 12.2724s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.2899s for 8192 events => throughput is 5.73E+02 events/s + [COUNTERS] PROGRAM TOTAL : 14.5936s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5183s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.0522s for 8192 events => throughput is 5.83E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0231s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -324,9 +329,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326277033163402E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 174.5012s - [COUNTERS] Fortran Overhead ( 0 ) : 16.5646s - [COUNTERS] CudaCpp MEs ( 2 ) : 157.9366s for 90112 events => throughput is 5.71E+02 events/s + [COUNTERS] PROGRAM TOTAL : 158.5014s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4348s + [COUNTERS] CudaCpp MEs ( 2 ) : 154.0430s for 90112 events => throughput is 5.85E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0236s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.701727e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.991558e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.718879e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.952358e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -368,9 +374,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405716646933743E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 23.9391s - [COUNTERS] Fortran Overhead ( 0 ) : 11.1292s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.8099s for 8192 events => throughput is 6.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 12.8606s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5199s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.3203s for 8192 events => throughput is 6.65E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0204s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -402,9 +409,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326277033163402E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 155.8533s - [COUNTERS] Fortran Overhead ( 0 ) : 14.9625s - [COUNTERS] CudaCpp MEs ( 2 ) : 140.8908s for 90112 events => throughput is 6.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 139.5398s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3981s + [COUNTERS] CudaCpp MEs ( 2 ) : 135.1212s for 90112 events => throughput is 6.67E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0205s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.716591e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.890802e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.738216e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.069181e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -446,9 +454,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405719257109645E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 25.2840s - [COUNTERS] Fortran Overhead ( 0 ) : 12.4664s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.8176s for 8192 events => throughput is 6.39E+02 events/s + [COUNTERS] PROGRAM TOTAL : 12.8130s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5166s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.2739s for 8192 events => throughput is 6.67E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0225s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -480,9 +489,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326283665697276E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 149.3053s - [COUNTERS] Fortran Overhead ( 0 ) : 16.1390s - [COUNTERS] CudaCpp MEs ( 2 ) : 133.1664s for 90112 events => throughput is 6.77E+02 events/s + [COUNTERS] PROGRAM TOTAL : 139.5916s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4260s + [COUNTERS] CudaCpp MEs ( 2 ) : 135.1428s for 90112 events => throughput is 6.67E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0228s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.340832e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.223008e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.256949e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.135239e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -523,9 +533,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405721007137020E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 2.5997s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0687s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5310s for 8192 events => throughput is 1.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.1089s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0215s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5332s for 8192 events => throughput is 1.54E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5542s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -556,9 +567,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326295421688232E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 11.4660s - [COUNTERS] Fortran Overhead ( 0 ) : 5.7555s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.7106s for 90112 events => throughput is 1.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 11.2844s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8851s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8421s for 90112 events => throughput is 1.54E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5572s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -571,42 +583,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.530700e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.533878e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.545413e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.547825e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.163666e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.147653e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.148478e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.124611e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.113035e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.134315e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.205309e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.131039e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.129848e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.139642e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.016707e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.021489e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 78332de82a..e8248fddca 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=cuda - +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -24,15 +24,15 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-06-29_00:09:11 +DATE: 2024-08-08_23:26:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 96.7022s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4938s - [COUNTERS] Fortran MEs ( 1 ) : 96.2084s for 8192 events => throughput is 8.51E+01 events/s + [COUNTERS] PROGRAM TOTAL : 103.0122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5139s + [COUNTERS] Fortran MEs ( 1 ) : 102.4983s for 8192 events => throughput is 7.99E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 96.6669s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5001s - [COUNTERS] Fortran MEs ( 1 ) : 96.1668s for 8192 events => throughput is 8.52E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.2993s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5294s + [COUNTERS] Fortran MEs ( 1 ) : 100.7699s for 8192 events => throughput is 8.13E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1066.3936s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3326s - [COUNTERS] Fortran MEs ( 1 ) : 1062.0609s for 90112 events => throughput is 8.48E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1118.7642s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3619s + [COUNTERS] Fortran MEs ( 1 ) : 1114.4022s for 90112 events => throughput is 8.09E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985299359844E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 227.5404s - [COUNTERS] Fortran Overhead ( 0 ) : 104.7327s - [COUNTERS] CudaCpp MEs ( 2 ) : 122.8076s for 8192 events => throughput is 6.67E+01 events/s + [COUNTERS] PROGRAM TOTAL : 125.7885s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5193s + [COUNTERS] CudaCpp MEs ( 2 ) : 125.0621s for 8192 events => throughput is 6.55E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2071s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993212353001E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1460.6766s - [COUNTERS] Fortran Overhead ( 0 ) : 108.1514s - [COUNTERS] CudaCpp MEs ( 2 ) : 1352.5253s for 90112 events => throughput is 6.66E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1322.8827s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3903s + [COUNTERS] CudaCpp MEs ( 2 ) : 1318.2870s for 90112 events => throughput is 6.84E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2054s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.953346e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.761597e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.887442e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.724704e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985295828471E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 113.0177s - [COUNTERS] Fortran Overhead ( 0 ) : 51.7429s - [COUNTERS] CudaCpp MEs ( 2 ) : 61.2748s for 8192 events => throughput is 1.34E+02 events/s + [COUNTERS] PROGRAM TOTAL : 62.4510s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5155s + [COUNTERS] CudaCpp MEs ( 2 ) : 61.8333s for 8192 events => throughput is 1.32E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1022s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222645653E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 722.6357s - [COUNTERS] Fortran Overhead ( 0 ) : 55.1190s - [COUNTERS] CudaCpp MEs ( 2 ) : 667.5167s for 90112 events => throughput is 1.35E+02 events/s + [COUNTERS] PROGRAM TOTAL : 684.8121s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4198s + [COUNTERS] CudaCpp MEs ( 2 ) : 680.2921s for 90112 events => throughput is 1.32E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.603521e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.589042e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.594140e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.588931e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 49.4491s - [COUNTERS] Fortran Overhead ( 0 ) : 22.5625s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.8866s for 8192 events => throughput is 3.05E+02 events/s + [COUNTERS] PROGRAM TOTAL : 27.0092s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5181s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.4459s for 8192 events => throughput is 3.10E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0452s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 323.2367s - [COUNTERS] Fortran Overhead ( 0 ) : 26.2106s - [COUNTERS] CudaCpp MEs ( 2 ) : 297.0260s for 90112 events => throughput is 3.03E+02 events/s + [COUNTERS] PROGRAM TOTAL : 298.0409s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4173s + [COUNTERS] CudaCpp MEs ( 2 ) : 293.5790s for 90112 events => throughput is 3.07E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0445s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.746482e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.648206e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.754234e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.625373e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 43.5060s - [COUNTERS] Fortran Overhead ( 0 ) : 19.6610s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.8450s for 8192 events => throughput is 3.44E+02 events/s + [COUNTERS] PROGRAM TOTAL : 24.3540s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5168s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.7936s for 8192 events => throughput is 3.44E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0436s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 285.3812s - [COUNTERS] Fortran Overhead ( 0 ) : 23.3907s - [COUNTERS] CudaCpp MEs ( 2 ) : 261.9904s for 90112 events => throughput is 3.44E+02 events/s + [COUNTERS] PROGRAM TOTAL : 269.6777s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4164s + [COUNTERS] CudaCpp MEs ( 2 ) : 265.2234s for 90112 events => throughput is 3.40E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0378s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.296352e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.285493e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.310764e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.289545e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 45.6608s - [COUNTERS] Fortran Overhead ( 0 ) : 22.1706s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.4902s for 8192 events => throughput is 3.49E+02 events/s + [COUNTERS] PROGRAM TOTAL : 25.1227s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5145s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.5642s for 8192 events => throughput is 3.33E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0441s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 283.0596s - [COUNTERS] Fortran Overhead ( 0 ) : 25.7915s - [COUNTERS] CudaCpp MEs ( 2 ) : 257.2681s for 90112 events => throughput is 3.50E+02 events/s + [COUNTERS] PROGRAM TOTAL : 274.1583s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4200s + [COUNTERS] CudaCpp MEs ( 2 ) : 269.6946s for 90112 events => throughput is 3.34E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0436s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.790026e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.625912e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.795378e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.662510e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985217419736E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 3.5887s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7249s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8638s for 8192 events => throughput is 9.48E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7717s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0261s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8763s for 8192 events => throughput is 9.35E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8694s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993078576733E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 15.9655s - [COUNTERS] Fortran Overhead ( 0 ) : 6.4579s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5077s for 90112 events => throughput is 9.48E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.2659s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8943s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.5013s for 90112 events => throughput is 9.48E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8704s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.454304e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.434661e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.092578e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.089765e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.107884e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.112116e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.159186e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.160890e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108717e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108390e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111609e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111312e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.112293e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.109990e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.645128e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.638783e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 5750f0dd36..b877c26fea 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,22 +1,22 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - make USEBUILDDIR=1 BACKEND=cuda + + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y - make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:31:29 +DATE: 2024-08-08_20:58:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4791s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4063s - [COUNTERS] Fortran MEs ( 1 ) : 0.0728s for 8192 events => throughput is 1.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4754s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4051s + [COUNTERS] Fortran MEs ( 1 ) : 0.0703s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4114s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3366s - [COUNTERS] Fortran MEs ( 1 ) : 0.0748s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4153s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3445s + [COUNTERS] Fortran MEs ( 1 ) : 0.0708s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3443s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5396s - [COUNTERS] Fortran MEs ( 1 ) : 0.8047s for 90112 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3303s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5573s + [COUNTERS] Fortran MEs ( 1 ) : 0.7730s for 90112 events => throughput is 1.17E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263335] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4894s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4114s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0780s for 8192 events => throughput is 1.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4189s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3418s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0764s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4884s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6200s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8684s for 90112 events => throughput is 1.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3766s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5374s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8384s for 90112 events => throughput is 1.07E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.076463e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.104999e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.077689e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.080050e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351262541] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4183s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0429s for 8192 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3875s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3450s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0419s for 8192 events => throughput is 1.96E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561281] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0553s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5812s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4741s for 90112 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0024s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5394s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4624s for 90112 events => throughput is 1.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.913652e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.937885e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.900215e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.972484e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3820s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3572s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 8192 events => throughput is 3.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3673s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3427s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0240s for 8192 events => throughput is 3.41E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8264s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5516s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2748s for 90112 events => throughput is 3.28E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8108s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5445s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2657s for 90112 events => throughput is 3.39E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.312529e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.384861e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.346229e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.378583e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3800s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3577s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0223s for 8192 events => throughput is 3.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3684s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3456s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 8192 events => throughput is 3.69E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7983s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5529s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2454s for 90112 events => throughput is 3.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7798s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5417s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2375s for 90112 events => throughput is 3.79E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.396153e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.465878e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.717666e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.626688e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4112s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3763s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0348s for 8192 events => throughput is 2.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3809s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3477s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0325s for 8192 events => throughput is 2.52E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0061s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6154s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3906s for 90112 events => throughput is 2.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8986s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5431s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3549s for 90112 events => throughput is 2.54E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.408273e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.412835e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.448995e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.491870e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263363] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7845s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7838s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.14E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7705s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7685s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 1.03E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561304] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9715s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9630s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 90112 events => throughput is 1.06E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9737s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9648s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 90112 events => throughput is 1.15E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.482506e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.555983e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.967685e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.037158e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.213762e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.629928e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.531197e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.566255e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.254372e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.636845e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.843988e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.850724e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.244977e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.619360e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.788434e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.790736e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 19656e6368..8ac388b886 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,30 +1,30 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make USEBUILDDIR=1 BACKEND=cppavx2 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' - -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:31:59 +DATE: 2024-08-08_20:58:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4662s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3929s - [COUNTERS] Fortran MEs ( 1 ) : 0.0732s for 8192 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4756s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4044s + [COUNTERS] Fortran MEs ( 1 ) : 0.0711s for 8192 events => throughput is 1.15E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4147s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3397s - [COUNTERS] Fortran MEs ( 1 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4108s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3420s + [COUNTERS] Fortran MEs ( 1 ) : 0.0688s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3268s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5225s - [COUNTERS] Fortran MEs ( 1 ) : 0.8043s for 90112 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3245s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5525s + [COUNTERS] Fortran MEs ( 1 ) : 0.7719s for 90112 events => throughput is 1.17E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110463158198617] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4778s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4049s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0729s for 8192 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4137s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3419s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0712s for 8192 events => throughput is 1.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686347932190] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4140s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6089s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8051s for 90112 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3233s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5375s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7851s for 90112 events => throughput is 1.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.138677e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.154270e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.134442e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.117776e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110459183868807] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3860s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3594s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0266s for 8192 events => throughput is 3.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3703s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3439s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0260s for 8192 events => throughput is 3.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510683073685827] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8423s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5500s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2923s for 90112 events => throughput is 3.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8197s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5348s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2844s for 90112 events => throughput is 3.17E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.036160e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.998738e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.064103e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.994620e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110460727141733] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3719s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3585s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 8192 events => throughput is 6.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3581s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3447s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510682516942223] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6766s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5315s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1451s for 90112 events => throughput is 6.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6873s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5442s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1427s for 90112 events => throughput is 6.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.245191e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.110364e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.259060e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.231132e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110460727141733] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4340s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4218s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0122s for 8192 events => throughput is 6.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3551s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3423s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0124s for 8192 events => throughput is 6.61E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510682516942223] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6834s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5468s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1366s for 90112 events => throughput is 6.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6706s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5390s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1312s for 90112 events => throughput is 6.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.674563e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.737889e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.747510e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.863785e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,38 +437,179 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110464220032526] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.3592s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3420s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0167s for 8192 events => throughput is 4.91E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110464220032526) differ by less than 4E-4 (2.771292368253242e-06) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510685471570221] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 1.7199s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5400s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1795s for 90112 events => throughput is 5.02E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510685471570221) differ by less than 4E-4 (5.043963013928732e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.872478e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.938459e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110477321990667] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.7679s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7663s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.31E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cuda (0.27110477321990667) differ by less than 4E-4 (2.2880132283242816e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510689318513457] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 1.9690s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9617s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.43E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cuda (0.21510689318513457) differ by less than 4E-4 (1.2839907048700638e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.567743e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.424411e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.006580e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.460162e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.113271e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.506902e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.545880e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.393633e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** -Program received signal SIGFPE: Floating-point exception - erroneous arithmetic operation. - -Backtrace for this error: -#0 0x7f2ce7e23860 in ??? -#1 0x7f2ce7e22a05 in ??? -#2 0x7f2ce7a54def in ??? -#3 0x7f2ce84b810a in ??? -#4 0x7f2ce80f2575 in ??? -#5 0x7f2ce84b4c89 in ??? -#6 0x7f2ce84bebfd in ??? -#7 0x7f2ce84c4491 in ??? -#8 0x4300eb in ??? -#9 0x431c70 in ??? -#10 0x432da7 in ??? -#11 0x433b7e in ??? -#12 0x44a9c1 in ??? -#13 0x42ebdf in ??? -#14 0x40371e in ??? -#15 0x7f2ce7a3feaf in ??? -#16 0x7f2ce7a3ff5f in ??? -#17 0x403844 in ??? -#18 0xffffffffffffffff in ??? -./madX.sh: line 389: 827445 Floating point exception(core dumped) $timecmd $cmd < ${tmpin} > ${tmp} -ERROR! ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' failed - PDF set = nn23lo1 - alpha_s(Mz)= 0.1300 running at 2 loops. - alpha_s(Mz)= 0.1300 running at 2 loops. - Renormalization scale set on event-by-event basis - Factorization scale set on event-by-event basis - - - getting user params -Enter number of events and max and min iterations: - Number of events and iterations 8192 1 1 +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index ce6f992dd2..25661e1063 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -2,21 +2,21 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + +make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make USEBUILDDIR=1 BACKEND=cpp512y - make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:32:16 +DATE: 2024-08-08_20:59:06 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4634s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3913s - [COUNTERS] Fortran MEs ( 1 ) : 0.0721s for 8192 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4768s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4060s + [COUNTERS] Fortran MEs ( 1 ) : 0.0709s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4051s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3318s - [COUNTERS] Fortran MEs ( 1 ) : 0.0733s for 8192 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4179s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3473s + [COUNTERS] Fortran MEs ( 1 ) : 0.0706s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3247s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5253s - [COUNTERS] Fortran MEs ( 1 ) : 0.7994s for 90112 events => throughput is 1.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3258s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5517s + [COUNTERS] Fortran MEs ( 1 ) : 0.7741s for 90112 events => throughput is 1.16E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539350666329] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4866s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4090s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0776s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4207s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3437s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0763s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686560103207] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4492s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6001s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8491s for 90112 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3663s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5373s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8282s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.073902e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.091070e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.081313e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.097593e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539350666335] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4169s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3745s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0424s for 8192 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3890s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3472s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0412s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686560103204] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0520s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5829s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4690s for 90112 events => throughput is 1.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9944s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5398s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4540s for 90112 events => throughput is 1.98E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.888912e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.922053e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.907750e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.990970e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3963s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3710s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0253s for 8192 events => throughput is 3.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3734s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3492s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.46E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8273s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5566s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2707s for 90112 events => throughput is 3.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8003s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5375s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2622s for 90112 events => throughput is 3.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.409995e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.424784e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.381232e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.455227e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3784s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3565s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0219s for 8192 events => throughput is 3.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3680s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3463s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.88E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7902s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5474s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2428s for 90112 events => throughput is 3.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7822s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5448s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2369s for 90112 events => throughput is 3.80E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.780633e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.843024e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.863761e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.890496e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +437,6 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 @@ -438,9 +445,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4046s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3701s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0345s for 8192 events => throughput is 2.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3872s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3503s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0362s for 8192 events => throughput is 2.26E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9365s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5577s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3788s for 90112 events => throughput is 2.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9147s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5452s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3689s for 90112 events => throughput is 2.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.345816e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.300565e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.395812e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.415614e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539343558537] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7702s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7695s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.17E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7684s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7665s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 1.09E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686553631395] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0084s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0002s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 90112 events => throughput is 1.10E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9688s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9599s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 90112 events => throughput is 1.15E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.433145e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.565914e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.010261e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.104681e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.473761e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.636309e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.584931e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.555697e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.303866e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.642280e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.831028e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.824016e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.325113e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.612307e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.788376e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.778614e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 46804abf09..9204db3db0 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -2,21 +2,21 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/h make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:37:41 +DATE: 2024-08-09_00:48:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.8908s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8436s - [COUNTERS] Fortran MEs ( 1 ) : 0.0472s for 8192 events => throughput is 1.73E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9141s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8671s + [COUNTERS] Fortran MEs ( 1 ) : 0.0470s for 8192 events => throughput is 1.74E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4057s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3597s - [COUNTERS] Fortran MEs ( 1 ) : 0.0460s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4185s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3716s + [COUNTERS] Fortran MEs ( 1 ) : 0.0468s for 8192 events => throughput is 1.75E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7090s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1999s - [COUNTERS] Fortran MEs ( 1 ) : 0.5092s for 90112 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7982s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2863s + [COUNTERS] Fortran MEs ( 1 ) : 0.5119s for 90112 events => throughput is 1.76E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256148] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4491s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4002s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0489s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4199s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3695s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0500s for 8192 events => throughput is 1.64E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8072s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2708s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5364s for 90112 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8165s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2690s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5470s for 90112 events => throughput is 1.65E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.706986e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.683813e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.699867e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.668738e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4065s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3803s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0263s for 8192 events => throughput is 3.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4071s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3797s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.03E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5505s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2580s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2925s for 90112 events => throughput is 3.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5672s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2711s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2957s for 90112 events => throughput is 3.05E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.034167e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.037815e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.993777e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.993910e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256232] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3843s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3683s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 8192 events => throughput is 5.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3883s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3715s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0164s for 8192 events => throughput is 5.00E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377489] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4149s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2377s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1772s for 90112 events => throughput is 5.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4641s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2801s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1835s for 90112 events => throughput is 4.91E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.046299e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.902798e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.149674e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.886099e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256232] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3712s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3876s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3719s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.38E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377489] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.3953s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2347s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1605s for 90112 events => throughput is 5.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4216s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2567s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1645s for 90112 events => throughput is 5.48E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.314068e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.361206e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.459027e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.494947e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4002s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3784s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 8192 events => throughput is 3.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3960s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3733s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 8192 events => throughput is 3.68E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5943s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3316s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2627s for 90112 events => throughput is 3.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5023s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2627s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2391s for 90112 events => throughput is 3.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.445922e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.615246e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.494379e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.662708e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256165] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.7849s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7843s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.40E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7949s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7934s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.20E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377573] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 2.2160s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2094s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.36E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7013s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6935s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 90112 events => throughput is 1.30E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.826997e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.844829e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.347084e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.285195e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.841221e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.255268e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.717455e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.760215e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.827200e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.235451e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.038168e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.038893e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.844247e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.241445e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.744588e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.725782e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index bbce3b7240..ae36851550 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=cuda - +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:38:08 +DATE: 2024-08-09_00:49:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9065s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8605s - [COUNTERS] Fortran MEs ( 1 ) : 0.0460s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9394s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8922s + [COUNTERS] Fortran MEs ( 1 ) : 0.0473s for 8192 events => throughput is 1.73E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3960s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3500s - [COUNTERS] Fortran MEs ( 1 ) : 0.0460s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4203s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3728s + [COUNTERS] Fortran MEs ( 1 ) : 0.0475s for 8192 events => throughput is 1.72E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7057s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1971s - [COUNTERS] Fortran MEs ( 1 ) : 0.5086s for 90112 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7988s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2854s + [COUNTERS] Fortran MEs ( 1 ) : 0.5133s for 90112 events => throughput is 1.76E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162897355760356] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1625 events) - [COUNTERS] PROGRAM TOTAL : 0.4481s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4028s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0453s for 8192 events => throughput is 1.81E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4180s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3713s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0463s for 8192 events => throughput is 1.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 2d49c9f52b..d90f539fcf 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:38:14 +DATE: 2024-08-09_00:49:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.8895s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8431s - [COUNTERS] Fortran MEs ( 1 ) : 0.0464s for 8192 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9158s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8684s + [COUNTERS] Fortran MEs ( 1 ) : 0.0474s for 8192 events => throughput is 1.73E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4135s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3638s - [COUNTERS] Fortran MEs ( 1 ) : 0.0497s for 8192 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4209s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3739s + [COUNTERS] Fortran MEs ( 1 ) : 0.0470s for 8192 events => throughput is 1.74E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7047s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1950s - [COUNTERS] Fortran MEs ( 1 ) : 0.5097s for 90112 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8008s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2889s + [COUNTERS] Fortran MEs ( 1 ) : 0.5118s for 90112 events => throughput is 1.76E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955975930954] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4487s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4005s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0482s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4229s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3736s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0488s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -168,9 +169,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895706383660] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8098s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2747s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5351s for 90112 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8077s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2621s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5452s for 90112 events => throughput is 1.65E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -184,13 +186,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.605995e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.584312e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.601314e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.572139e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +216,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955975930958] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4069s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3802s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0266s for 8192 events => throughput is 3.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4000s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3717s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0278s for 8192 events => throughput is 2.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -248,9 +251,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895706383669] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5495s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2534s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2962s for 90112 events => throughput is 3.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6068s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3000s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3063s for 90112 events => throughput is 2.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -264,13 +268,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.929578e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.801476e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.898682e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.739519e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +298,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953696393] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3820s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3660s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 8192 events => throughput is 5.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4107s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3912s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0191s for 8192 events => throughput is 4.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -328,9 +333,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701245432] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4322s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2507s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1815s for 90112 events => throughput is 4.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4541s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2695s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1842s for 90112 events => throughput is 4.89E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -344,13 +350,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.830829e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.846731e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.788599e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.806331e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +380,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953696393] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3978s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3830s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3903s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3744s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0155s for 8192 events => throughput is 5.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -408,9 +415,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701245432] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4028s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2374s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1654s for 90112 events => throughput is 5.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4306s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2629s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1673s for 90112 events => throughput is 5.39E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -424,13 +432,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.250961e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.198253e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.207264e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.334338e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +462,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4003s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3781s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 8192 events => throughput is 3.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4086s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3841s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0240s for 8192 events => throughput is 3.41E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -488,9 +497,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4957s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2503s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2454s for 90112 events => throughput is 3.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5232s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2714s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2514s for 90112 events => throughput is 3.58E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -504,13 +514,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.177035e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.375382e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.405731e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.300552e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -533,9 +543,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955503257827] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.7815s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7809s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.39E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7989s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7974s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.20E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -566,9 +577,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895242795732] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.6630s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6565s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.38E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6979s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6904s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.36E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -581,42 +593,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.830803e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.835154e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.306249e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.144694e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.830251e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.230105e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.737584e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.705062e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.843710e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.235322e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.038788e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.035545e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.827828e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.242431e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.738262e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.754474e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index cacd0f35d9..5562e4c07e 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -3,9 +3,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppnone + +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:40:12 +DATE: 2024-08-09_00:52:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.6435s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3260s - [COUNTERS] Fortran MEs ( 1 ) : 2.3175s for 8192 events => throughput is 3.53E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5941s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3442s + [COUNTERS] Fortran MEs ( 1 ) : 2.2499s for 8192 events => throughput is 3.64E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.5471s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3268s - [COUNTERS] Fortran MEs ( 1 ) : 2.2203s for 8192 events => throughput is 3.69E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6220s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3462s + [COUNTERS] Fortran MEs ( 1 ) : 2.2759s for 8192 events => throughput is 3.60E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 26.2382s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7627s - [COUNTERS] Fortran MEs ( 1 ) : 24.4756s for 90112 events => throughput is 3.68E+03 events/s + [COUNTERS] PROGRAM TOTAL : 26.7017s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8086s + [COUNTERS] Fortran MEs ( 1 ) : 24.8931s for 90112 events => throughput is 3.62E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 5.0170s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6185s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3985s for 8192 events => throughput is 3.42E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7821s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3463s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4305s for 8192 events => throughput is 3.37E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438187E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 30.4436s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0366s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.4070s for 90112 events => throughput is 3.41E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.5017s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7808s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.7158s for 90112 events => throughput is 3.37E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.563022e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.542884e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.568130e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.530103e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084412E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.8190s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5560s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2630s for 8192 events => throughput is 6.49E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.6103s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3441s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2634s for 8192 events => throughput is 6.48E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 16.9508s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9389s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.0119s for 90112 events => throughput is 6.43E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.9197s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7936s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.1234s for 90112 events => throughput is 6.38E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.403640e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.656588e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.805258e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.664988e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.4145s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8628s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5516s for 8192 events => throughput is 1.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9116s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3446s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5653s for 8192 events => throughput is 1.45E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.4794s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2982s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.1812s for 90112 events => throughput is 1.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.0033s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7755s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.2261s for 90112 events => throughput is 1.45E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.512239e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.485686e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.514846e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.488153e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.3031s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8025s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5006s for 8192 events => throughput is 1.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8483s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3476s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4991s for 8192 events => throughput is 1.64E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 7.6774s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2371s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4403s for 90112 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.2914s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7820s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.5079s for 90112 events => throughput is 1.64E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.729653e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.693554e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.736018e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.678028e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.5822s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9519s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6302s for 8192 events => throughput is 1.30E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9859s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3430s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6411s for 8192 events => throughput is 1.28E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 9.3263s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3416s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.9846s for 90112 events => throughput is 1.29E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.8930s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7934s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.0976s for 90112 events => throughput is 1.27E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.291281e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.269596e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.310298e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.304260e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8138s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7967s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8106s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7739s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.76E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0196s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.3691s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1805s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1886s for 90112 events => throughput is 4.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4031s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1951s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1884s for 90112 events => throughput is 4.78E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0195s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.833128e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.836004e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.235176e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.223426e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.111711e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.196129e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.408923e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.417377e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.159800e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.149870e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.418265e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.416796e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.120081e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.156718e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.756468e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.752894e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 92432a70ab..e6a1cba79b 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx - make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone + +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:42:44 +DATE: 2024-08-09_00:54:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.5425s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3274s - [COUNTERS] Fortran MEs ( 1 ) : 2.2151s for 8192 events => throughput is 3.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6010s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3425s + [COUNTERS] Fortran MEs ( 1 ) : 2.2584s for 8192 events => throughput is 3.63E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.5437s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3275s - [COUNTERS] Fortran MEs ( 1 ) : 2.2162s for 8192 events => throughput is 3.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6135s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3438s + [COUNTERS] Fortran MEs ( 1 ) : 2.2696s for 8192 events => throughput is 3.61E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 26.3201s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7716s - [COUNTERS] Fortran MEs ( 1 ) : 24.5485s for 90112 events => throughput is 3.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 26.5878s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7922s + [COUNTERS] Fortran MEs ( 1 ) : 24.7956s for 90112 events => throughput is 3.63E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896784952157763E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 4.9557s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5931s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3626s for 8192 events => throughput is 3.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7487s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3437s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4000s for 8192 events => throughput is 3.41E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668138450782073E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 29.8768s - [COUNTERS] Fortran Overhead ( 0 ) : 3.9900s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.8868s for 90112 events => throughput is 3.48E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.1446s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7932s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.3466s for 90112 events => throughput is 3.42E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.615815e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.577022e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.588760e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.590866e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896766542858863E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.7362s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0215s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7147s for 8192 events => throughput is 1.15E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0076s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3437s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6623s for 8192 events => throughput is 1.24E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668121906848987E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 9.5915s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3718s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.2197s for 90112 events => throughput is 1.25E+04 events/s + [COUNTERS] PROGRAM TOTAL : 9.0575s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7825s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.2734s for 90112 events => throughput is 1.24E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.287471e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.265218e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.287472e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.265996e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8858s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6041s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2817s for 8192 events => throughput is 2.91E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6296s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3461s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2826s for 8192 events => throughput is 2.90E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 5.1146s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9843s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.1303s for 90112 events => throughput is 2.88E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9000s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7718s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.1273s for 90112 events => throughput is 2.88E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.952012e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.939784e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.978134e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.964350e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8292s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5726s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2566s for 8192 events => throughput is 3.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6110s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3506s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2595s for 8192 events => throughput is 3.16E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 4.7825s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9553s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8272s for 90112 events => throughput is 3.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.6623s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7820s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8794s for 90112 events => throughput is 3.13E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.330203e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.263231e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.291014e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.247254e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896778056937195E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.9527s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6385s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3142s for 8192 events => throughput is 2.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6684s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3460s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3212s for 8192 events => throughput is 2.55E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668139178203571E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 5.5476s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0509s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4967s for 90112 events => throughput is 2.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3279s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7717s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5549s for 90112 events => throughput is 2.53E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.605632e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.589261e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.608873e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.602723e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896802503195373E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8048s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7896s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.39E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8100s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7757s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0171s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668190930428073E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.3385s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1704s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1681s for 90112 events => throughput is 5.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3814s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1945s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1700s for 90112 events => throughput is 5.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0169s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,37 +573,37 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.899244e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.860775e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.168943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.139558e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.330723e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.304686e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.347926e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.344126e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.329833e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.335964e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.345608e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.345203e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.313833e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.314317e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 7abb61d6c6..7e343e91b1 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,8 +13,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:44:48 +DATE: 2024-08-09_00:56:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.5379s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3249s - [COUNTERS] Fortran MEs ( 1 ) : 2.2131s for 8192 events => throughput is 3.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5870s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3434s + [COUNTERS] Fortran MEs ( 1 ) : 2.2435s for 8192 events => throughput is 3.65E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.5505s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3304s - [COUNTERS] Fortran MEs ( 1 ) : 2.2201s for 8192 events => throughput is 3.69E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5935s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3429s + [COUNTERS] Fortran MEs ( 1 ) : 2.2507s for 8192 events => throughput is 3.64E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 26.2622s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7530s - [COUNTERS] Fortran MEs ( 1 ) : 24.5092s for 90112 events => throughput is 3.68E+03 events/s + [COUNTERS] PROGRAM TOTAL : 26.4482s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7926s + [COUNTERS] Fortran MEs ( 1 ) : 24.6556s for 90112 events => throughput is 3.65E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696375074447E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 5.0598s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6439s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4159s for 8192 events => throughput is 3.39E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7899s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3466s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4385s for 8192 events => throughput is 3.36E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0049s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668081976882373E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 30.6006s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0492s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.5514s for 90112 events => throughput is 3.39E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.6799s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7926s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.8820s for 90112 events => throughput is 3.35E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.474454e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.507267e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.439517e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.511786e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696285825688E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.7397s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5249s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2148s for 8192 events => throughput is 6.74E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.5883s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3421s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2436s for 8192 events => throughput is 6.59E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668081890954375E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 16.3722s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9270s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.4452s for 90112 events => throughput is 6.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.4498s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7701s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.6770s for 90112 events => throughput is 6.59E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.040827e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.943689e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.981328e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.925887e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.4085s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8620s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5465s for 8192 events => throughput is 1.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9098s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3504s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5576s for 8192 events => throughput is 1.47E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.3202s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2754s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.0449s for 90112 events => throughput is 1.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.9207s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7702s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.1490s for 90112 events => throughput is 1.47E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.536921e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.518105e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.529532e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.514088e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.2826s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7966s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4860s for 8192 events => throughput is 1.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8334s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3445s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4873s for 8192 events => throughput is 1.68E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 7.5114s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1898s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.3216s for 90112 events => throughput is 1.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.1725s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7642s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4067s for 90112 events => throughput is 1.67E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.741888e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.710218e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.760590e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.722202e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.5988s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9602s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6386s for 8192 events => throughput is 1.28E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9928s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3430s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6479s for 8192 events => throughput is 1.26E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0019s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 9.4068s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3459s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.0609s for 90112 events => throughput is 1.28E+04 events/s + [COUNTERS] PROGRAM TOTAL : 9.0659s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7892s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.2749s for 90112 events => throughput is 1.24E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.302056e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.210214e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.302590e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.254889e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697918297644E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8079s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7907s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8127s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7760s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0173s for 8192 events => throughput is 4.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0195s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551547592E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.5070s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3171s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1899s for 90112 events => throughput is 4.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4045s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1952s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1896s for 90112 events => throughput is 4.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0197s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.814826e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.814747e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.215347e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.187533e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.080489e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.164029e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.380958e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.389995e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.077544e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.128645e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.380568e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.372948e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.123241e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.119403e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.746541e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.750060e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 6e0ebf0fe6..0fe0851e40 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,8 +1,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x - make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-07-23_17:06:30 +DATE: 2024-08-09_00:50:54 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1767 events (found 4306 events) - [COUNTERS] PROGRAM TOTAL : 0.6347s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6253s - [COUNTERS] Fortran MEs ( 1 ) : 0.0093s for 8192 events => throughput is 8.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6580s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6494s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.58E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3827s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3737s - [COUNTERS] Fortran MEs ( 1 ) : 0.0089s for 8192 events => throughput is 9.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3938s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3851s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.50E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3840s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2894s - [COUNTERS] Fortran MEs ( 1 ) : 0.0946s for 90112 events => throughput is 9.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4272s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3345s + [COUNTERS] Fortran MEs ( 1 ) : 0.0927s for 90112 events => throughput is 9.72E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3976s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3899s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3960s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3874s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 9.99E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3567s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2681s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0886s for 90112 events => throughput is 1.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.4271s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3353s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0914s for 90112 events => throughput is 9.86E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.005595e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.006217e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.012216e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.022578e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3752s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3708s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.84E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3903s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3856s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0043s for 8192 events => throughput is 1.89E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3147s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2675s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0472s for 90112 events => throughput is 1.91E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3937s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3444s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0489s for 90112 events => throughput is 1.84E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.909460e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.897485e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.971415e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.985824e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3705s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.18E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3921s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3888s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.88E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.2978s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0299s for 90112 events => throughput is 3.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3531s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3221s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0306s for 90112 events => throughput is 2.95E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.080442e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.126014e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.074278e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.364824e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3716s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3691s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3883s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3854s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.20E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3003s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2717s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0286s for 90112 events => throughput is 3.15E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3635s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3336s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0295s for 90112 events => throughput is 3.05E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.360999e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.285096e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.517113e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.423598e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3734s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3703s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3910s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3874s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.63E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3066s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2761s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0304s for 90112 events => throughput is 2.96E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3563s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3235s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0323s for 90112 events => throughput is 2.79E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.892371e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.866364e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.222820e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.134151e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869280] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.8041s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8036s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8164s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8152s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.37E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384401] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.7080s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7028s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 90112 events => throughput is 1.75E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7576s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7518s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0052s for 90112 events => throughput is 1.72E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.783715e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.730366e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.058188e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.967481e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.170829e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.198830e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.604074e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.649618e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.147861e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.170218e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.994548e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.903772e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.155905e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.201664e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.319425e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.319844e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 0993fdcc1c..5c4b04cd13 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-07-23_17:06:54 +DATE: 2024-08-09_00:51:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1767 events (found 4306 events) - [COUNTERS] PROGRAM TOTAL : 0.6621s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6530s - [COUNTERS] Fortran MEs ( 1 ) : 0.0091s for 8192 events => throughput is 9.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6497s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6414s + [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.86E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3966s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3872s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4039s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3951s + [COUNTERS] Fortran MEs ( 1 ) : 0.0089s for 8192 events => throughput is 9.25E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3966s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3008s - [COUNTERS] Fortran MEs ( 1 ) : 0.0958s for 90112 events => throughput is 9.40E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4878s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3911s + [COUNTERS] Fortran MEs ( 1 ) : 0.0967s for 90112 events => throughput is 9.32E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156021439979276] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3872s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3787s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0086s for 8192 events => throughput is 9.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3975s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3887s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 8192 events => throughput is 9.70E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098550550786874] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3681s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2789s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0892s for 90112 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.4264s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3345s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0916s for 90112 events => throughput is 9.84E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.015948e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.034265e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.021671e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.024334e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156021343761686] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3766s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3741s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3905s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3875s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.09E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098550488814170] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3081s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2794s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0287s for 90112 events => throughput is 3.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3711s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3420s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0289s for 90112 events => throughput is 3.12E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.304389e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.288372e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.380080e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.432097e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156021516056748] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3710s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3693s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.90E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3889s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3868s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.52E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098550596898289] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.2810s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2620s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0191s for 90112 events => throughput is 4.72E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3432s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3229s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0200s for 90112 events => throughput is 4.50E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.883752e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.077269e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.254694e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.403997e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156021516056748] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3739s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3722s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.74E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3869s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3848s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.55E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098550596898289] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3493s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3299s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0195s for 90112 events => throughput is 4.63E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3387s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3197s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0187s for 90112 events => throughput is 4.81E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.263107e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.322495e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.596148e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.427973e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156021917867366] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3804s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3783s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3878s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3853s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.78E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098551029624061] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.2838s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2628s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 90112 events => throughput is 4.30E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3406s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3185s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 90112 events => throughput is 4.14E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.410618e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.424607e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.579934e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.888963e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156022290359153] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.8021s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8016s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.58E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8169s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8154s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.46E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098551341908548] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.7042s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6994s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.90E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7464s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7407s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.85E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.004673e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.032627e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.352806e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.278657e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.848697e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.543019e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.574226e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.578539e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.876693e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.555176e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.655828e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.658200e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.468075e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.883073e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.699037e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.705532e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index f51812e183..62624c2c92 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,9 +1,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-07-23_17:07:18 +DATE: 2024-08-09_00:51:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1767 events (found 4306 events) - [COUNTERS] PROGRAM TOTAL : 0.6364s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6273s - [COUNTERS] Fortran MEs ( 1 ) : 0.0091s for 8192 events => throughput is 8.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6493s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6409s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.81E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3980s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3884s - [COUNTERS] Fortran MEs ( 1 ) : 0.0096s for 8192 events => throughput is 8.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3992s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3909s + [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.85E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.4071s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3113s - [COUNTERS] Fortran MEs ( 1 ) : 0.0958s for 90112 events => throughput is 9.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4133s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3208s + [COUNTERS] Fortran MEs ( 1 ) : 0.0925s for 90112 events => throughput is 9.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156028014369008] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3870s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3789s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3950s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3864s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 9.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098557069460298] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3664s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2748s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0916s for 90112 events => throughput is 9.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4087s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3177s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0906s for 90112 events => throughput is 9.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.803105e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.803386e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.833012e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.910254e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156028014369008] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3826s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3782s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.86E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3923s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3874s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.82E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098557069460298] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3196s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2730s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0466s for 90112 events => throughput is 1.93E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3653s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3175s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0474s for 90112 events => throughput is 1.90E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.966382e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.964224e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.002002e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.028853e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3795s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3765s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.76E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3954s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3923s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.03E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3017s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2730s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0287s for 90112 events => throughput is 3.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3415s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3131s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0280s for 90112 events => throughput is 3.22E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.113986e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.237365e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.514629e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.416021e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3847s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3820s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3940s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3909s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.04E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3019s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2739s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0280s for 90112 events => throughput is 3.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3467s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3184s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0279s for 90112 events => throughput is 3.23E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.470466e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.347126e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.587854e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.589308e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3703s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3978s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3942s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.66E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3054s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2744s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0310s for 90112 events => throughput is 2.91E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3501s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3186s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0311s for 90112 events => throughput is 2.90E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.935388e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.904623e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.012886e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.114835e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027194560187] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.8059s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8054s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.56E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8152s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8140s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.39E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556243340819] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.7003s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6952s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 90112 events => throughput is 1.78E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7501s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7444s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 90112 events => throughput is 1.75E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.612145e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.842332e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.945612e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.019027e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.146137e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.214756e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.463844e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.517612e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.146854e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.171297e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.862724e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.740991e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.162271e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.214875e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.282910e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.310258e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 679246dd46..6131633fdd 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:38:40 +DATE: 2024-08-09_00:49:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.7777s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7362s - [COUNTERS] Fortran MEs ( 1 ) : 0.0415s for 8192 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8016s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7599s + [COUNTERS] Fortran MEs ( 1 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3976s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3565s - [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4173s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3758s + [COUNTERS] Fortran MEs ( 1 ) : 0.0415s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6909s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2365s - [COUNTERS] Fortran MEs ( 1 ) : 0.4544s for 90112 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6984s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2478s + [COUNTERS] Fortran MEs ( 1 ) : 0.4506s for 90112 events => throughput is 2.00E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419863] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4458s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4030s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0427s for 8192 events => throughput is 1.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4145s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3702s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0438s for 8192 events => throughput is 1.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7824s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3031s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4793s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7366s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2536s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4825s for 90112 events => throughput is 1.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.924806e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.880754e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.912421e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.882930e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4066s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3820s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3960s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3713s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0243s for 8192 events => throughput is 3.37E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.5413s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2750s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2663s for 90112 events => throughput is 3.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5199s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2483s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2711s for 90112 events => throughput is 3.32E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.314362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.302363e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.440069e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.365112e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3874s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3723s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0150s for 8192 events => throughput is 5.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3924s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3765s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0155s for 8192 events => throughput is 5.28E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4296s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2659s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1637s for 90112 events => throughput is 5.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4183s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2503s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1675s for 90112 events => throughput is 5.38E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.062177e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.278183e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.382563e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.374748e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3976s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3833s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3894s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0136s for 8192 events => throughput is 6.02E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4167s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2662s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1505s for 90112 events => throughput is 5.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3978s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2454s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1520s for 90112 events => throughput is 5.93E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.763603e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.775498e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.871042e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.841522e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3996s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3785s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4047s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3821s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 8192 events => throughput is 3.70E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.5325s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2948s + [COUNTERS] PROGRAM TOTAL : 1.4927s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2545s [COUNTERS] CudaCpp MEs ( 2 ) : 0.2377s for 90112 events => throughput is 3.79E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.502625e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.798876e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.556287e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.612840e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8405s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8399s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.41E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8126s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8111s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.24E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6883s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6820s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.42E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6862s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6788s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.37E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.919891e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.869432e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.630666e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.714086e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.869333e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.311155e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.082168e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.083882e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.898237e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.322734e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.164202e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.159310e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.905759e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.296675e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.090826e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.098537e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 9e00b5e78a..58b86df658 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -2,12 +2,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda - - make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:39:07 +DATE: 2024-08-09_00:50:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,8 +58,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.7827s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7412s + [COUNTERS] PROGRAM TOTAL : 0.8051s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7635s [COUNTERS] Fortran MEs ( 1 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3949s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3540s - [COUNTERS] Fortran MEs ( 1 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4148s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3740s + [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6850s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2308s - [COUNTERS] Fortran MEs ( 1 ) : 0.4541s for 90112 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7188s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2615s + [COUNTERS] Fortran MEs ( 1 ) : 0.4573s for 90112 events => throughput is 1.97E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598853620719339] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4401s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3996s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4164s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3751s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577522280119403] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7388s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2934s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4454s for 90112 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7041s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2499s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4538s for 90112 events => throughput is 1.99E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.042800e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.004528e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.015984e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.989674e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598849697851406] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3877s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3717s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 8192 events => throughput is 5.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3933s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3758s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.76E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577518590213366] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4390s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2600s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1790s for 90112 events => throughput is 5.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4571s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2702s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1866s for 90112 events => throughput is 4.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.765299e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.766493e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.747967e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.711541e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598850036412124] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3742s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3654s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 8192 events => throughput is 9.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3932s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3838s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 8192 events => throughput is 8.99E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577518612400254] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3503s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2561s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0942s for 90112 events => throughput is 9.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3456s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2495s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0958s for 90112 events => throughput is 9.40E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.065332e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.204759e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.552613e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.210555e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598850036412124] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3831s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3750s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3855s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3769s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 8192 events => throughput is 9.85E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577518612400254] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3419s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2529s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0890s for 90112 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3394s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2483s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0908s for 90112 events => throughput is 9.92E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.815596e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.706656e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.012011e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.233766e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598854350242270] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3801s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3696s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 8192 events => throughput is 7.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3868s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3748s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0116s for 8192 events => throughput is 7.03E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577522751628507] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3921s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2698s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1223s for 90112 events => throughput is 7.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3825s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2565s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1256s for 90112 events => throughput is 7.17E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.916519e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.942843e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.893526e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.910825e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598870301426373] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.7890s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7885s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.59E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8091s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8078s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.43E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577527268256027] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6767s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6712s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 90112 events => throughput is 1.65E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7098s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7033s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 90112 events => throughput is 1.56E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.812781e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.705094e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.214634e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.269887e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.018876e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.888199e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.400207e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.391800e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.038902e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.898622e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.501990e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.539526e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.639781e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.473018e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.489136e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.495430e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index e096eb78b5..75d0c77429 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -6,8 +6,8 @@ make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -18,10 +18,10 @@ make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:39:32 +DATE: 2024-08-09_00:50:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.7859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7445s - [COUNTERS] Fortran MEs ( 1 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8208s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7796s + [COUNTERS] Fortran MEs ( 1 ) : 0.0412s for 8192 events => throughput is 1.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3977s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3560s - [COUNTERS] Fortran MEs ( 1 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4160s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3749s + [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6901s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2335s - [COUNTERS] Fortran MEs ( 1 ) : 0.4566s for 90112 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7104s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2559s + [COUNTERS] Fortran MEs ( 1 ) : 0.4544s for 90112 events => throughput is 1.98E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598861353577519] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4458s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4017s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0440s for 8192 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4204s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3749s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0450s for 8192 events => throughput is 1.82E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577525144126803] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7736s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2953s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4783s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7448s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2577s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4867s for 90112 events => throughput is 1.85E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.906547e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.873127e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.885100e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.907422e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598861353577519] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4069s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3833s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0236s for 8192 events => throughput is 3.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3960s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3712s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.36E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577525144126810] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.5337s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2711s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2626s for 90112 events => throughput is 3.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5269s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2579s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2686s for 90112 events => throughput is 3.35E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.321004e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.333942e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.457355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.376975e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3969s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3823s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3926s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3769s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0153s for 8192 events => throughput is 5.37E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4292s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2669s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1623s for 90112 events => throughput is 5.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4173s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2508s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1662s for 90112 events => throughput is 5.42E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.432194e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.335642e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.311023e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.330908e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3721s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0138s for 8192 events => throughput is 5.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3897s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3750s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0143s for 8192 events => throughput is 5.74E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4048s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2571s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1477s for 90112 events => throughput is 6.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4068s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2528s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1536s for 90112 events => throughput is 5.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.922477e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.855366e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.992472e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.947430e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4037s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3836s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0201s for 8192 events => throughput is 4.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3995s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3772s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0219s for 8192 events => throughput is 3.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4944s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2693s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2251s for 90112 events => throughput is 4.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4943s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2580s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2358s for 90112 events => throughput is 3.82E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.827539e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.733262e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.759224e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.702855e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860056955807] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.7908s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7902s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.43E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8053s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8039s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.21E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,8 +558,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] Cross section = 44.58 [44.577523872560512] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) [COUNTERS] PROGRAM TOTAL : 1.6927s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6862s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.38E+07 events/s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6853s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.36E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.009355e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.871837e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.608692e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.622666e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.895025e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.299743e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.063290e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.055606e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.880064e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.302003e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.136844e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.140289e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.885288e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.319830e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.014417e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.983678e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tput/gitdifftput.sh b/epochX/cudacpp/tput/gitdifftput.sh index 541d57ef1e..b2c183afe7 100755 --- a/epochX/cudacpp/tput/gitdifftput.sh +++ b/epochX/cudacpp/tput/gitdifftput.sh @@ -27,7 +27,7 @@ exclude2='(Entering|Leaving|Building|HASCURAND|BACKEND|USEBUILDDIR)' # Lines (interesting) which may change on different software versions exclude3='(Symbols|Avg|Relative|MeanMatrixElemValue)' # Lines (uninteresting) which change when missing some tests (no avx512, cuda, hip...) -exclude4='(runExe|cmpExe|runNcu|SIGMA|Workflow|FP|Internal|OMP|Symbols|PASSED|INFO|WARNING|PROF|\+OK|\-OK|CPU:|===|\.\.\.|\-$|\+$)' +exclude4='(runTest|runExe|cmpExe|runNcu|SIGMA|Workflow|FP|Internal|OMP|Symbols|PASSED|INFO|WARNING|PROF|\+OK|\-OK|CPU:|===|\.\.\.|\-$|\+$)' # Lines (interesting) which show that some tests are missing (no avx512, cuda, hip...) exclude5='(Not found|no avx512vl)' diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index acaa25cc7a..ad26491862 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:14:43 +DATE: 2024-08-08_19:47:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.992468e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.765141e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.187072e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.598959e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.638501e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.177835e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.671047 sec +TOTAL : 0.698559 sec INFO: No Floating Point Exceptions have been reported - 2,683,013,854 cycles # 2.970 GHz - 4,099,943,544 instructions # 1.53 insn per cycle - 0.969012880 seconds time elapsed + 2,601,897,002 cycles # 2.808 GHz + 4,040,507,104 instructions # 1.55 insn per cycle + 0.999350103 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.073867e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.251694e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.251694e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.054108e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.229313e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.229313e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.258093 sec +TOTAL : 6.402837 sec INFO: No Floating Point Exceptions have been reported - 19,045,527,053 cycles # 3.041 GHz - 46,105,742,127 instructions # 2.42 insn per cycle - 6.263804311 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 476) (avx2: 0) (512y: 0) (512z: 0) + 19,233,855,272 cycles # 3.000 GHz + 46,180,507,769 instructions # 2.40 insn per cycle + 6.412153445 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.629061e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.125560e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.125560e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.601848e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.093713e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.093713e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.266484 sec +TOTAL : 4.363298 sec INFO: No Floating Point Exceptions have been reported - 12,926,063,866 cycles # 3.026 GHz - 31,614,643,699 instructions # 2.45 insn per cycle - 4.272247607 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1676) (avx2: 0) (512y: 0) (512z: 0) + 13,100,720,322 cycles # 2.997 GHz + 31,716,075,564 instructions # 2.42 insn per cycle + 4.372588931 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1664) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.041568e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.865995e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.865995e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.042973e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.858628e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.858628e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.490041 sec +TOTAL : 3.509207 sec INFO: No Floating Point Exceptions have been reported - 10,064,100,406 cycles # 2.880 GHz - 19,613,549,872 instructions # 1.95 insn per cycle - 3.496071587 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1972) (512y: 0) (512z: 0) + 10,205,028,097 cycles # 2.901 GHz + 19,707,283,623 instructions # 1.93 insn per cycle + 3.518316321 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1946) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.101707e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.962511e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.962511e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.068954e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.924439e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.924439e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.393729 sec +TOTAL : 3.473859 sec INFO: No Floating Point Exceptions have been reported - 9,843,950,565 cycles # 2.897 GHz - 19,263,531,824 instructions # 1.96 insn per cycle - 3.399372842 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1713) (512y: 178) (512z: 0) + 10,004,130,884 cycles # 2.873 GHz + 19,357,111,804 instructions # 1.93 insn per cycle + 3.483068816 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1685) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.833491e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.443330e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.443330e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.804457e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.421604e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.421604e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.832834 sec +TOTAL : 3.921670 sec INFO: No Floating Point Exceptions have been reported - 8,597,411,507 cycles # 2.241 GHz - 15,728,652,694 instructions # 1.83 insn per cycle - 3.838260969 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 900) (512y: 156) (512z: 1257) + 8,766,336,363 cycles # 2.231 GHz + 15,830,799,810 instructions # 1.81 insn per cycle + 3.930866073 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 880) (512y: 156) (512z: 1257) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index 076a1808b0..254ccc5cd6 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:49:18 +DATE: 2024-08-08_20:16:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -53,15 +53,15 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.215489e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.207119e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.207119e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.859786e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.167324e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.167324e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.432396 sec +TOTAL : 2.182775 sec INFO: No Floating Point Exceptions have been reported - 7,595,029,577 cycles # 2.848 GHz - 13,416,645,348 instructions # 1.77 insn per cycle - 2.736931171 seconds time elapsed + 7,222,143,773 cycles # 2.974 GHz + 12,988,458,578 instructions # 1.80 insn per cycle + 2.484589357 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -70,8 +70,10 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -89,20 +91,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.034815e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.199828e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.199828e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.023014e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.186587e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.186587e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.685334 sec +TOTAL : 6.792659 sec INFO: No Floating Point Exceptions have been reported - 20,294,656,403 cycles # 3.033 GHz - 46,336,697,418 instructions # 2.28 insn per cycle - 6.692388795 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 476) (avx2: 0) (512y: 0) (512z: 0) + 20,463,079,955 cycles # 3.008 GHz + 46,412,955,093 instructions # 2.27 insn per cycle + 6.804041518 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -118,20 +121,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.544003e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.988575e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.988575e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.536442e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.970461e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.970461e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.684901 sec +TOTAL : 4.741441 sec INFO: No Floating Point Exceptions have been reported - 14,246,354,655 cycles # 3.037 GHz - 32,456,861,259 instructions # 2.28 insn per cycle - 4.692010345 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1676) (avx2: 0) (512y: 0) (512z: 0) + 14,332,452,862 cycles # 3.016 GHz + 32,573,923,419 instructions # 2.27 insn per cycle + 4.753137415 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1664) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -147,20 +151,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.922827e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.637793e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.637793e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.834595e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.507335e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.507335e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.896654 sec +TOTAL : 4.104610 sec INFO: No Floating Point Exceptions have been reported - 11,461,775,110 cycles # 2.937 GHz - 20,973,003,881 instructions # 1.83 insn per cycle - 3.903908173 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1972) (512y: 0) (512z: 0) + 11,547,104,567 cycles # 2.806 GHz + 21,093,610,719 instructions # 1.83 insn per cycle + 4.116807687 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1946) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -176,20 +181,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.970040e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.709759e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.709759e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.917747e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.629096e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.629096e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.821641 sec +TOTAL : 3.937807 sec INFO: No Floating Point Exceptions have been reported - 11,209,928,511 cycles # 2.929 GHz - 20,622,007,360 instructions # 1.84 insn per cycle - 3.828782847 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1713) (512y: 178) (512z: 0) + 11,279,300,088 cycles # 2.856 GHz + 20,732,054,777 instructions # 1.84 insn per cycle + 3.949582750 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1685) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -205,20 +211,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.674013e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.181228e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.181228e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.634373e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.159831e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.159831e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.396074 sec +TOTAL : 4.550735 sec INFO: No Floating Point Exceptions have been reported - 10,006,474,405 cycles # 2.275 GHz - 16,876,699,682 instructions # 1.69 insn per cycle - 4.403473618 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 900) (512y: 156) (512z: 1257) + 10,336,377,696 cycles # 2.266 GHz + 17,023,763,380 instructions # 1.65 insn per cycle + 4.562764893 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 880) (512y: 156) (512z: 1257) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index 107d3d6a6a..a17dc8d37a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_21:00:12 +DATE: 2024-08-08_20:28:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.812093e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.683603e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.123683e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.117423e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.844085e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.131938e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.372435 sec +TOTAL : 1.358559 sec INFO: No Floating Point Exceptions have been reported - 4,633,412,607 cycles # 2.869 GHz - 7,070,218,708 instructions # 1.53 insn per cycle - 1.671081959 seconds time elapsed + 4,616,681,568 cycles # 2.947 GHz + 7,101,035,160 instructions # 1.54 insn per cycle + 1.643879361 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.056221e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.231517e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.231517e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.047167e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.219441e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.219441e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 6.732250 sec +TOTAL : 6.877625 sec INFO: No Floating Point Exceptions have been reported - 20,212,821,842 cycles # 3.004 GHz - 46,213,328,831 instructions # 2.29 insn per cycle - 6.737705322 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 476) (avx2: 0) (512y: 0) (512z: 0) + 20,474,853,896 cycles # 2.975 GHz + 46,476,031,399 instructions # 2.27 insn per cycle + 6.883195189 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.624027e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.119257e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.119257e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.613543e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.104302e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.104302e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.643508 sec +TOTAL : 4.762997 sec INFO: No Floating Point Exceptions have been reported - 14,061,889,539 cycles # 3.025 GHz - 31,617,431,963 instructions # 2.25 insn per cycle - 4.649295615 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1676) (avx2: 0) (512y: 0) (512z: 0) + 14,341,567,999 cycles # 3.008 GHz + 31,906,796,447 instructions # 2.22 insn per cycle + 4.768768263 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1664) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.052114e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.869806e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.869806e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.037523e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.848398e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.848398e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.828551 sec +TOTAL : 3.928675 sec INFO: No Floating Point Exceptions have been reported - 11,213,173,484 cycles # 2.925 GHz - 19,515,414,655 instructions # 1.74 insn per cycle - 3.833932438 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1972) (512y: 0) (512z: 0) + 11,431,967,131 cycles # 2.907 GHz + 19,749,163,356 instructions # 1.73 insn per cycle + 3.934544865 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1946) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.111125e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.978452e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.978452e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.057561e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.903205e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.903205e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.744899 sec +TOTAL : 3.914555 sec INFO: No Floating Point Exceptions have been reported - 10,990,201,092 cycles # 2.931 GHz - 18,964,501,195 instructions # 1.73 insn per cycle - 3.750289687 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1713) (512y: 178) (512z: 0) + 11,301,789,336 cycles # 2.884 GHz + 19,198,978,685 instructions # 1.70 insn per cycle + 3.919932247 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1685) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.836834e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.450918e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.450918e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.792077e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.384424e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.384424e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.193324 sec +TOTAL : 4.355139 sec INFO: No Floating Point Exceptions have been reported - 9,768,736,605 cycles # 2.327 GHz - 15,431,475,618 instructions # 1.58 insn per cycle - 4.198624459 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 900) (512y: 156) (512z: 1257) + 9,975,675,333 cycles # 2.288 GHz + 15,643,574,075 instructions # 1.57 insn per cycle + 4.360684158 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 880) (512y: 156) (512z: 1257) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index 6fb775969f..02f69b4d1c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:57:32 +DATE: 2024-08-08_20:25:18 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.817933e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.722429e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.176386e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.161167e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.790408e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.166295e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.968188 sec +TOTAL : 0.968841 sec INFO: No Floating Point Exceptions have been reported - 3,589,395,127 cycles # 2.997 GHz - 7,175,672,002 instructions # 2.00 insn per cycle - 1.254215961 seconds time elapsed + 3,539,663,050 cycles # 2.958 GHz + 6,992,486,553 instructions # 1.98 insn per cycle + 1.255291189 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.079632e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.258365e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.258365e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.054864e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.230420e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.230420e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.223806 sec +TOTAL : 6.368076 sec INFO: No Floating Point Exceptions have been reported - 19,058,925,167 cycles # 3.060 GHz - 46,108,548,764 instructions # 2.42 insn per cycle - 6.229330377 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 476) (avx2: 0) (512y: 0) (512z: 0) + 19,096,334,706 cycles # 2.997 GHz + 46,076,716,123 instructions # 2.41 insn per cycle + 6.373662191 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.601349e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.088576e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.088576e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.601324e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.083048e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.083048e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.339936 sec +TOTAL : 4.335443 sec INFO: No Floating Point Exceptions have been reported - 12,970,405,515 cycles # 2.986 GHz - 31,616,392,768 instructions # 2.44 insn per cycle - 4.345471916 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1676) (avx2: 0) (512y: 0) (512z: 0) + 12,960,942,150 cycles # 2.986 GHz + 31,610,247,350 instructions # 2.44 insn per cycle + 4.340962885 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1664) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.072548e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.896235e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.896235e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.037265e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.842019e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.842019e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.433513 sec +TOTAL : 3.487108 sec INFO: No Floating Point Exceptions have been reported - 10,074,347,286 cycles # 2.931 GHz - 19,614,838,060 instructions # 1.95 insn per cycle - 3.438916068 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1972) (512y: 0) (512z: 0) + 10,064,000,379 cycles # 2.882 GHz + 19,599,635,012 instructions # 1.95 insn per cycle + 3.492608891 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1946) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.120381e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.995544e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.995544e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.083703e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.929723e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.929723e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.362383 sec +TOTAL : 3.417011 sec INFO: No Floating Point Exceptions have been reported - 9,846,721,407 cycles # 2.924 GHz - 19,261,667,804 instructions # 1.96 insn per cycle - 3.367866307 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1713) (512y: 178) (512z: 0) + 9,860,886,386 cycles # 2.882 GHz + 19,261,098,945 instructions # 1.95 insn per cycle + 3.422241820 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1685) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.838612e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.456006e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.456006e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.806629e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.401308e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.401308e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.825489 sec +TOTAL : 3.881256 sec INFO: No Floating Point Exceptions have been reported - 8,628,909,528 cycles # 2.253 GHz - 15,727,742,566 instructions # 1.82 insn per cycle - 3.831075373 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 900) (512y: 156) (512z: 1257) + 8,602,524,027 cycles # 2.214 GHz + 15,722,205,670 instructions # 1.83 insn per cycle + 3.886723200 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 880) (512y: 156) (512z: 1257) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index bdcfd1fbcb..35f9b1d01f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:54:51 +DATE: 2024-08-08_20:22:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,22 +50,24 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.161971e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.654112e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.070165e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.201911e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.800503e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.039847e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.855886 sec +TOTAL : 1.856881 sec INFO: No Floating Point Exceptions have been reported - 6,276,149,380 cycles # 3.000 GHz - 11,476,884,450 instructions # 1.83 insn per cycle - 2.148848830 seconds time elapsed + 6,224,640,386 cycles # 2.971 GHz + 11,427,865,713 instructions # 1.84 insn per cycle + 2.153600888 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -82,20 +84,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.066433e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.245773e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.245773e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.044821e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.217145e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.217145e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.298786 sec +TOTAL : 6.426882 sec INFO: No Floating Point Exceptions have been reported - 19,098,852,731 cycles # 3.030 GHz - 46,107,987,765 instructions # 2.41 insn per cycle - 6.304281104 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 476) (avx2: 0) (512y: 0) (512z: 0) + 19,111,682,358 cycles # 2.975 GHz + 46,077,003,649 instructions # 2.41 insn per cycle + 6.432401292 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -110,20 +113,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.595423e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.081086e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.081086e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.618749e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.109823e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.109823e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.355941 sec +TOTAL : 4.289727 sec INFO: No Floating Point Exceptions have been reported - 12,963,236,746 cycles # 2.973 GHz - 31,615,561,135 instructions # 2.44 insn per cycle - 4.361261638 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1676) (avx2: 0) (512y: 0) (512z: 0) + 12,954,885,068 cycles # 3.017 GHz + 31,610,318,935 instructions # 2.44 insn per cycle + 4.295110036 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1664) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -138,20 +142,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.008621e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.798575e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.798575e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.027068e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.831891e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.831891e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.540183 sec +TOTAL : 3.501218 sec INFO: No Floating Point Exceptions have been reported - 10,084,623,628 cycles # 2.845 GHz - 19,614,025,069 instructions # 1.94 insn per cycle - 3.545761412 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1972) (512y: 0) (512z: 0) + 10,084,953,651 cycles # 2.877 GHz + 19,599,538,271 instructions # 1.94 insn per cycle + 3.506570863 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1946) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -166,20 +171,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.116739e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.991229e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.991229e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.095436e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.953376e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.953376e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.368422 sec +TOTAL : 3.399978 sec INFO: No Floating Point Exceptions have been reported - 9,858,689,282 cycles # 2.923 GHz - 19,261,795,866 instructions # 1.95 insn per cycle - 3.373773154 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1713) (512y: 178) (512z: 0) + 9,825,140,072 cycles # 2.886 GHz + 19,248,188,821 instructions # 1.96 insn per cycle + 3.405318176 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1685) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -194,20 +200,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.845230e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.462099e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.462099e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.764156e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.337626e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.337626e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.809187 sec +TOTAL : 3.973951 sec INFO: No Floating Point Exceptions have been reported - 8,619,862,123 cycles # 2.260 GHz - 15,729,443,486 instructions # 1.82 insn per cycle - 3.814723478 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 900) (512y: 156) (512z: 1257) + 8,632,225,098 cycles # 2.170 GHz + 15,724,542,893 instructions # 1.82 insn per cycle + 3.979226146 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 880) (512y: 156) (512z: 1257) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index a7f2977f4a..30013486b3 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:15:13 +DATE: 2024-08-08_19:48:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.219099e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.898314e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.236850e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.631857e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.952875e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.229430e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.660344 sec +TOTAL : 0.661237 sec INFO: No Floating Point Exceptions have been reported - 2,665,657,651 cycles # 2.984 GHz - 4,166,061,268 instructions # 1.56 insn per cycle - 0.952670294 seconds time elapsed + 2,635,614,506 cycles # 2.952 GHz + 4,105,447,914 instructions # 1.56 insn per cycle + 0.952322039 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.068718e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.244728e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.244728e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.051765e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.227570e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.227570e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.286281 sec +TOTAL : 6.414969 sec INFO: No Floating Point Exceptions have been reported - 19,185,891,095 cycles # 3.050 GHz - 46,066,815,570 instructions # 2.40 insn per cycle - 6.291973339 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 465) (avx2: 0) (512y: 0) (512z: 0) + 19,212,287,097 cycles # 2.991 GHz + 46,135,858,785 instructions # 2.40 insn per cycle + 6.423899634 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.628951e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.130065e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.130065e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.601077e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.094081e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.094081e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.270966 sec +TOTAL : 4.367872 sec INFO: No Floating Point Exceptions have been reported - 12,944,360,063 cycles # 3.028 GHz - 31,588,001,932 instructions # 2.44 insn per cycle - 4.276321086 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) + 13,124,994,280 cycles # 3.000 GHz + 31,690,002,602 instructions # 2.41 insn per cycle + 4.377128729 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1650) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.045031e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.858625e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.858625e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.022628e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.826530e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.826530e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.478567 sec +TOTAL : 3.545071 sec INFO: No Floating Point Exceptions have been reported - 10,055,767,156 cycles # 2.887 GHz - 19,593,455,838 instructions # 1.95 insn per cycle - 3.484108783 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1955) (512y: 0) (512z: 0) + 10,210,134,759 cycles # 2.873 GHz + 19,686,352,650 instructions # 1.93 insn per cycle + 3.554081422 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1929) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.065932e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.899247e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.899247e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.045349e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.884198e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.884198e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.447809 sec +TOTAL : 3.513122 sec INFO: No Floating Point Exceptions have been reported - 9,836,323,224 cycles # 2.849 GHz - 19,277,262,833 instructions # 1.96 insn per cycle - 3.453628682 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 178) (512z: 0) + 10,000,248,812 cycles # 2.840 GHz + 19,370,551,089 instructions # 1.94 insn per cycle + 3.521931882 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1670) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.866203e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.502282e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.502282e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.856445e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.503167e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.503167e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.770476 sec +TOTAL : 3.821454 sec INFO: No Floating Point Exceptions have been reported - 8,464,276,197 cycles # 2.242 GHz - 15,597,917,914 instructions # 1.84 insn per cycle - 3.775924795 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 886) (512y: 156) (512z: 1237) + 8,619,394,582 cycles # 2.251 GHz + 15,699,269,615 instructions # 1.82 insn per cycle + 3.830496732 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 866) (512y: 156) (512z: 1237) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 8eeaf4b96f..012009e54a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:40:08 +DATE: 2024-08-08_20:07:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.511337e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.610262e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.162427e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.604046e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.930880e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.176471e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.682277 sec +TOTAL : 0.659931 sec INFO: No Floating Point Exceptions have been reported - 2,714,625,205 cycles # 2.961 GHz - 4,224,591,836 instructions # 1.56 insn per cycle - 0.976598730 seconds time elapsed + 2,627,383,079 cycles # 2.945 GHz + 4,093,880,816 instructions # 1.56 insn per cycle + 0.951439392 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.674677e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.150026e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.150026e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.646087e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.119341e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.119341e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.159141 sec +TOTAL : 4.251791 sec INFO: No Floating Point Exceptions have been reported - 12,686,441,916 cycles # 3.047 GHz - 32,456,476,690 instructions # 2.56 insn per cycle - 4.164732578 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 294) (avx2: 0) (512y: 0) (512z: 0) + 12,834,346,286 cycles # 3.012 GHz + 32,589,275,830 instructions # 2.54 insn per cycle + 4.261338656 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 281) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.114486e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.024182e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.024182e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.060473e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.955935e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.955935e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.379506 sec +TOTAL : 3.488809 sec INFO: No Floating Point Exceptions have been reported - 10,290,063,880 cycles # 3.041 GHz - 24,598,812,176 instructions # 2.39 insn per cycle - 3.384861278 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1263) (avx2: 0) (512y: 0) (512z: 0) + 10,533,405,751 cycles # 3.012 GHz + 24,716,100,998 instructions # 2.35 insn per cycle + 3.498417147 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.309413e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.377046e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.377046e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.261794e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.343751e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.343751e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.121396 sec +TOTAL : 3.211208 sec INFO: No Floating Point Exceptions have been reported - 9,147,892,087 cycles # 2.927 GHz - 16,921,916,652 instructions # 1.85 insn per cycle - 3.126771565 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1630) (512y: 0) (512z: 0) + 9,296,707,178 cycles # 2.887 GHz + 17,025,233,631 instructions # 1.83 insn per cycle + 3.220709148 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1608) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.371372e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.510492e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.510492e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.333155e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.462746e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.462746e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.049635 sec +TOTAL : 3.127002 sec INFO: No Floating Point Exceptions have been reported - 8,903,759,047 cycles # 2.915 GHz - 16,334,428,688 instructions # 1.83 insn per cycle - 3.055129338 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1370) (512y: 139) (512z: 0) + 9,070,042,536 cycles # 2.893 GHz + 16,440,168,447 instructions # 1.81 insn per cycle + 3.136632933 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1344) (512y: 139) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.046677e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.842152e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.842152e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.025516e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.816401e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.816401e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.475555 sec +TOTAL : 3.537864 sec INFO: No Floating Point Exceptions have been reported - 7,902,906,453 cycles # 2.271 GHz - 14,570,187,590 instructions # 1.84 insn per cycle - 3.480874373 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1012) (512y: 158) (512z: 954) + 8,060,468,675 cycles # 2.273 GHz + 14,674,271,295 instructions # 1.82 insn per cycle + 3.547452410 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 990) (512y: 158) (512z: 954) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index bed7cd9b36..6698342434 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:40:34 +DATE: 2024-08-08_20:07:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.514355e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.633683e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.214359e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.562157e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.979811e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.228825e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.680593 sec +TOTAL : 0.660029 sec INFO: No Floating Point Exceptions have been reported - 2,706,915,647 cycles # 2.956 GHz - 4,190,146,936 instructions # 1.55 insn per cycle - 0.976111914 seconds time elapsed + 2,629,191,587 cycles # 2.942 GHz + 4,053,968,750 instructions # 1.54 insn per cycle + 0.953306046 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.165926e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.047190e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.047190e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.156529e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.042455e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.042455e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.302787 sec +TOTAL : 3.343977 sec INFO: No Floating Point Exceptions have been reported - 10,019,502,432 cycles # 3.029 GHz - 25,412,419,372 instructions # 2.54 insn per cycle - 3.308288779 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 249) (avx2: 0) (512y: 0) (512z: 0) + 10,082,768,824 cycles # 3.008 GHz + 25,523,612,333 instructions # 2.53 insn per cycle + 3.352820230 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 236) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.445937e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.787045e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.787045e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.385757e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.677774e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.677774e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.976079 sec +TOTAL : 3.073965 sec INFO: No Floating Point Exceptions have been reported - 9,030,679,969 cycles # 3.030 GHz - 21,406,976,056 instructions # 2.37 insn per cycle - 2.981503418 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1112) (avx2: 0) (512y: 0) (512z: 0) + 9,151,066,373 cycles # 2.969 GHz + 21,519,389,474 instructions # 2.35 insn per cycle + 3.083295145 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1100) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.454857e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.711643e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.711643e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.361878e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.558423e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.558423e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.961656 sec +TOTAL : 3.100634 sec INFO: No Floating Point Exceptions have been reported - 8,673,614,407 cycles # 2.924 GHz - 15,870,802,072 instructions # 1.83 insn per cycle - 2.967121740 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1503) (512y: 0) (512z: 0) + 8,837,735,013 cycles # 2.843 GHz + 15,972,170,074 instructions # 1.81 insn per cycle + 3.110024553 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1481) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.517492e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.840768e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.840768e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.456785e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.751546e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.751546e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.896325 sec +TOTAL : 2.990911 sec INFO: No Floating Point Exceptions have been reported - 8,488,467,643 cycles # 2.926 GHz - 15,590,785,507 instructions # 1.84 insn per cycle - 2.901737452 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1282) (512y: 141) (512z: 0) + 8,652,752,906 cycles # 2.885 GHz + 15,679,245,875 instructions # 1.81 insn per cycle + 3.000632003 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1256) (512y: 141) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.182346e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.108553e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.108553e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.146098e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.052577e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.052577e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.281506 sec +TOTAL : 3.361559 sec INFO: No Floating Point Exceptions have been reported - 7,603,569,525 cycles # 2.314 GHz - 14,280,278,230 instructions # 1.88 insn per cycle - 3.286945297 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1041) (512y: 164) (512z: 876) + 7,684,713,240 cycles # 2.281 GHz + 14,381,480,169 instructions # 1.87 insn per cycle + 3.370756572 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1019) (512y: 164) (512z: 876) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 7cc0b5502b..7cb0226a73 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:15:43 +DATE: 2024-08-08_19:48:51 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.368793e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.204395e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.149884e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.527020e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.262134e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.154425e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.572378 sec +TOTAL : 0.568660 sec INFO: No Floating Point Exceptions have been reported - 2,355,843,554 cycles # 2.958 GHz - 3,660,983,048 instructions # 1.55 insn per cycle - 0.854414572 seconds time elapsed + 2,313,614,099 cycles # 2.926 GHz + 3,562,444,599 instructions # 1.54 insn per cycle + 0.849201094 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 121 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.112502e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.313461e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.313461e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.093483e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.290231e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.290231e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.013760 sec +TOTAL : 6.128520 sec INFO: No Floating Point Exceptions have been reported - 18,243,635,947 cycles # 3.031 GHz - 45,004,183,150 instructions # 2.47 insn per cycle - 6.018991847 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 424) (avx2: 0) (512y: 0) (512z: 0) + 18,358,884,229 cycles # 2.993 GHz + 45,043,610,227 instructions # 2.45 insn per cycle + 6.135113438 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.304371e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.534496e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.534496e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.301890e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.520762e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.520762e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.091758 sec +TOTAL : 3.110805 sec INFO: No Floating Point Exceptions have been reported - 9,346,891,298 cycles # 3.019 GHz - 22,293,361,334 instructions # 2.39 insn per cycle - 3.097066950 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1969) (avx2: 0) (512y: 0) (512z: 0) + 9,366,787,669 cycles # 3.005 GHz + 22,330,309,821 instructions # 2.38 insn per cycle + 3.117673303 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1957) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.502796e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.831762e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.831762e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.473210e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.807312e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.807312e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.873268 sec +TOTAL : 2.917892 sec INFO: No Floating Point Exceptions have been reported - 8,375,296,851 cycles # 2.910 GHz - 15,755,495,593 instructions # 1.88 insn per cycle - 2.878533702 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2608) (512y: 0) (512z: 0) + 8,504,359,827 cycles # 2.909 GHz + 15,788,659,527 instructions # 1.86 insn per cycle + 2.924742872 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.546067e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.922691e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.922691e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.503770e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.901448e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.901448e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.827933 sec +TOTAL : 2.886577 sec INFO: No Floating Point Exceptions have been reported - 8,252,760,214 cycles # 2.914 GHz - 15,614,570,874 instructions # 1.89 insn per cycle - 2.833212419 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2516) (512y: 12) (512z: 0) + 8,412,391,431 cycles # 2.908 GHz + 15,643,654,257 instructions # 1.86 insn per cycle + 2.893387724 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2500) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.539602e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.940166e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.940166e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.563180e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.953888e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.953888e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.842362 sec +TOTAL : 2.828437 sec INFO: No Floating Point Exceptions have been reported - 6,678,672,318 cycles # 2.346 GHz - 12,862,803,929 instructions # 1.93 insn per cycle - 2.847578540 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) + 6,692,094,866 cycles # 2.362 GHz + 12,901,049,888 instructions # 1.93 insn per cycle + 2.834887138 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1728) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 7685e81166..e0350b6b37 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:49:52 +DATE: 2024-08-08_20:17:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -53,15 +53,15 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.311756e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.058142e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.058142e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.473571e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.655207e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.655207e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.675029 sec +TOTAL : 1.648294 sec INFO: No Floating Point Exceptions have been reported - 5,729,315,825 cycles # 2.994 GHz - 10,322,159,846 instructions # 1.80 insn per cycle - 1.971186526 seconds time elapsed + 5,601,516,010 cycles # 2.985 GHz + 10,167,612,404 instructions # 1.82 insn per cycle + 1.933877739 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -70,8 +70,10 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo ==PROF== Profiling "sigmaKin": launch__registers_per_thread 121 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -89,20 +91,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.083960e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.275861e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.275861e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.085388e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.276616e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.276616e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.271203 sec +TOTAL : 6.267894 sec INFO: No Floating Point Exceptions have been reported - 18,953,360,500 cycles # 3.020 GHz - 45,156,793,477 instructions # 2.38 insn per cycle - 6.277920644 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 424) (avx2: 0) (512y: 0) (512z: 0) + 18,908,429,443 cycles # 3.015 GHz + 45,146,579,440 instructions # 2.39 insn per cycle + 6.274110345 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -118,20 +121,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.201354e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.285456e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.285456e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.203296e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.287244e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.287244e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.357849 sec +TOTAL : 3.346534 sec INFO: No Floating Point Exceptions have been reported - 10,064,004,997 cycles # 2.992 GHz - 23,628,789,623 instructions # 2.35 insn per cycle - 3.364700820 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1969) (avx2: 0) (512y: 0) (512z: 0) + 10,054,217,163 cycles # 3.000 GHz + 23,624,196,038 instructions # 2.35 insn per cycle + 3.352720761 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1957) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -147,20 +151,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.368809e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.551932e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.551932e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.355349e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.546206e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.546206e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.146808 sec +TOTAL : 3.162857 sec INFO: No Floating Point Exceptions have been reported - 9,223,382,458 cycles # 2.927 GHz - 16,875,617,789 instructions # 1.83 insn per cycle - 3.153415409 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2608) (512y: 0) (512z: 0) + 9,188,398,792 cycles # 2.900 GHz + 16,865,170,162 instructions # 1.84 insn per cycle + 3.169069798 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -176,20 +181,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.410709e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.657033e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.657033e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.385264e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.627916e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.627916e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.101188 sec +TOTAL : 3.125444 sec INFO: No Floating Point Exceptions have been reported - 9,095,421,618 cycles # 2.931 GHz - 16,730,461,761 instructions # 1.84 insn per cycle - 3.108136976 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2516) (512y: 12) (512z: 0) + 9,070,498,443 cycles # 2.897 GHz + 16,723,535,304 instructions # 1.84 insn per cycle + 3.131626525 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2500) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -205,20 +211,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.442658e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.667781e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.667781e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.403637e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.591618e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.591618e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.065007 sec +TOTAL : 3.114765 sec INFO: No Floating Point Exceptions have been reported - 7,435,897,800 cycles # 2.422 GHz - 14,069,390,876 instructions # 1.89 insn per cycle - 3.071736369 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) + 7,403,928,752 cycles # 2.373 GHz + 14,061,923,411 instructions # 1.90 insn per cycle + 3.121062730 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1728) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index e48bdabd24..134d5790db 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_21:00:44 +DATE: 2024-08-08_20:28:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.330774e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.178373e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.122775e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.369933e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.192240e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.130758e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.177048 sec +TOTAL : 1.177651 sec INFO: No Floating Point Exceptions have been reported - 4,161,772,250 cycles # 2.977 GHz - 6,642,618,775 instructions # 1.60 insn per cycle - 1.455092421 seconds time elapsed + 4,159,647,361 cycles # 2.974 GHz + 6,655,919,197 instructions # 1.60 insn per cycle + 1.454885517 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 121 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.114730e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.316689e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.316689e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.106596e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.306356e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.306356e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 6.329097 sec +TOTAL : 6.378232 sec INFO: No Floating Point Exceptions have been reported - 19,286,025,200 cycles # 3.045 GHz - 45,186,488,992 instructions # 2.34 insn per cycle - 6.334387640 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 424) (avx2: 0) (512y: 0) (512z: 0) + 19,274,317,116 cycles # 3.020 GHz + 45,182,791,116 instructions # 2.34 insn per cycle + 6.383426426 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.315547e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.562028e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.562028e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.314732e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.536945e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.536945e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 3.414865 sec +TOTAL : 3.415254 sec INFO: No Floating Point Exceptions have been reported - 10,356,902,074 cycles # 3.029 GHz - 22,374,508,098 instructions # 2.16 insn per cycle - 3.420105986 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1969) (avx2: 0) (512y: 0) (512z: 0) + 10,316,548,749 cycles # 3.017 GHz + 22,369,828,182 instructions # 2.17 insn per cycle + 3.420542694 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1957) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.458189e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.760875e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.760875e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.440596e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.750420e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.750420e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.251788 sec +TOTAL : 3.274423 sec INFO: No Floating Point Exceptions have been reported - 9,410,320,620 cycles # 2.890 GHz - 15,667,209,118 instructions # 1.66 insn per cycle - 3.257075709 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2608) (512y: 0) (512z: 0) + 9,443,732,115 cycles # 2.881 GHz + 15,660,089,896 instructions # 1.66 insn per cycle + 3.279649935 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.506691e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.885597e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.885597e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.490204e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.861466e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.861466e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.208545 sec +TOTAL : 3.226764 sec INFO: No Floating Point Exceptions have been reported - 9,330,497,019 cycles # 2.904 GHz - 15,326,133,128 instructions # 1.64 insn per cycle - 3.213770933 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2516) (512y: 12) (512z: 0) + 9,373,690,310 cycles # 2.901 GHz + 15,311,292,063 instructions # 1.63 insn per cycle + 3.231783686 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2500) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.572156e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.955274e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.955274e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.539604e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.891988e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.891988e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.139046 sec +TOTAL : 3.181070 sec INFO: No Floating Point Exceptions have been reported - 7,661,751,104 cycles # 2.437 GHz - 12,572,532,310 instructions # 1.64 insn per cycle - 3.144282241 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) + 7,641,722,393 cycles # 2.399 GHz + 12,564,622,024 instructions # 1.64 insn per cycle + 3.186357864 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1728) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index 97c35a5219..88892aa3af 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:58:02 +DATE: 2024-08-08_20:25:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.318335e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.194026e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.156430e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.382651e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.206198e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.156880e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.844316 sec +TOTAL : 0.845509 sec INFO: No Floating Point Exceptions have been reported - 3,204,326,437 cycles # 3.008 GHz - 6,522,971,608 instructions # 2.04 insn per cycle - 1.121979744 seconds time elapsed + 3,157,288,524 cycles # 2.956 GHz + 6,452,716,967 instructions # 2.04 insn per cycle + 1.124028974 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 121 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.098591e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.298491e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.298491e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.102313e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.299140e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.299140e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.087920 sec +TOTAL : 6.067726 sec INFO: No Floating Point Exceptions have been reported - 18,281,240,224 cycles # 3.001 GHz - 45,004,706,136 instructions # 2.46 insn per cycle - 6.093197293 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 424) (avx2: 0) (512y: 0) (512z: 0) + 18,241,926,835 cycles # 3.004 GHz + 44,997,190,895 instructions # 2.47 insn per cycle + 6.073021817 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.344868e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.585602e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.585602e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.262484e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.452586e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.452586e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.044494 sec +TOTAL : 3.153640 sec INFO: No Floating Point Exceptions have been reported - 9,316,186,672 cycles # 3.055 GHz - 22,293,378,605 instructions # 2.39 insn per cycle - 3.049801016 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1969) (avx2: 0) (512y: 0) (512z: 0) + 9,294,014,762 cycles # 2.943 GHz + 22,288,953,735 instructions # 2.40 insn per cycle + 3.158807454 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1957) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.511279e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.866100e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.866100e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.393307e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.660811e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.660811e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.864030 sec +TOTAL : 3.002727 sec INFO: No Floating Point Exceptions have been reported - 8,395,420,887 cycles # 2.927 GHz - 15,755,249,273 instructions # 1.88 insn per cycle - 2.869286108 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2608) (512y: 0) (512z: 0) + 8,431,789,445 cycles # 2.804 GHz + 15,745,619,364 instructions # 1.87 insn per cycle + 3.007966059 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.550336e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.966675e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.966675e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.401412e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.704220e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.704220e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.820910 sec +TOTAL : 2.993880 sec INFO: No Floating Point Exceptions have been reported - 8,300,103,012 cycles # 2.938 GHz - 15,610,356,251 instructions # 1.88 insn per cycle - 2.826135977 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2516) (512y: 12) (512z: 0) + 8,307,647,714 cycles # 2.771 GHz + 15,598,428,137 instructions # 1.88 insn per cycle + 2.998876053 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2500) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.570643e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.939319e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.939319e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.569189e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.940564e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.940564e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.807069 sec +TOTAL : 2.807856 sec INFO: No Floating Point Exceptions have been reported - 6,622,447,715 cycles # 2.355 GHz - 12,862,667,527 instructions # 1.94 insn per cycle - 2.812373511 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) + 6,608,078,812 cycles # 2.350 GHz + 12,854,592,970 instructions # 1.95 insn per cycle + 2.812995127 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1728) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index ef231365e8..9b85e8bca9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:55:22 +DATE: 2024-08-08_20:23:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,22 +50,24 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.303769e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.151846e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.015925e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.140303e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.190749e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.050049e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.455812 sec +TOTAL : 1.475514 sec INFO: No Floating Point Exceptions have been reported - 5,035,703,348 cycles # 2.998 GHz - 9,233,292,656 instructions # 1.83 insn per cycle - 1.735608374 seconds time elapsed + 5,002,845,340 cycles # 2.948 GHz + 9,174,343,943 instructions # 1.83 insn per cycle + 1.753614320 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 121 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -82,20 +84,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.111353e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.309700e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.309700e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.100425e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.302255e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.302255e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.015558 sec +TOTAL : 6.083427 sec INFO: No Floating Point Exceptions have been reported - 18,261,729,705 cycles # 3.034 GHz - 45,005,382,332 instructions # 2.46 insn per cycle - 6.020795485 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 424) (avx2: 0) (512y: 0) (512z: 0) + 18,286,986,421 cycles # 3.004 GHz + 44,997,971,916 instructions # 2.46 insn per cycle + 6.088650881 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -110,20 +113,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.356374e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.602029e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.602029e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.314534e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.542028e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.542028e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.032852 sec +TOTAL : 3.081783 sec INFO: No Floating Point Exceptions have been reported - 9,274,537,984 cycles # 3.053 GHz - 22,293,508,856 instructions # 2.40 insn per cycle - 3.038261520 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1969) (avx2: 0) (512y: 0) (512z: 0) + 9,321,092,178 cycles # 3.020 GHz + 22,287,543,522 instructions # 2.39 insn per cycle + 3.087086590 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1957) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -138,20 +142,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.521232e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.881344e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.881344e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.473883e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.791063e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.791063e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.849181 sec +TOTAL : 2.904887 sec INFO: No Floating Point Exceptions have been reported - 8,409,015,977 cycles # 2.947 GHz - 15,756,716,712 instructions # 1.87 insn per cycle - 2.854559012 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2608) (512y: 0) (512z: 0) + 8,410,533,055 cycles # 2.892 GHz + 15,745,298,993 instructions # 1.87 insn per cycle + 2.910034115 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -166,20 +171,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.555493e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.967215e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.967215e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.505951e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.882287e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.882287e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.817657 sec +TOTAL : 2.874716 sec INFO: No Floating Point Exceptions have been reported - 8,284,280,666 cycles # 2.936 GHz - 15,609,086,887 instructions # 1.88 insn per cycle - 2.822871000 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2516) (512y: 12) (512z: 0) + 8,289,781,145 cycles # 2.880 GHz + 15,603,340,875 instructions # 1.88 insn per cycle + 2.879926744 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2500) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -194,20 +200,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.591429e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.994264e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.994264e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.541059e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.907885e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.907885e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.786817 sec +TOTAL : 2.838024 sec INFO: No Floating Point Exceptions have been reported - 6,634,065,696 cycles # 2.377 GHz - 12,862,560,130 instructions # 1.94 insn per cycle - 2.792188382 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1736) (512y: 17) (512z: 1439) + 6,642,493,654 cycles # 2.337 GHz + 12,855,006,533 instructions # 1.94 insn per cycle + 2.843273121 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1728) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index b94fdd94e9..1d6c5eac35 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:16:09 +DATE: 2024-08-08_19:49:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.384600e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.250346e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.207785e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.538728e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.270981e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.213583e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.570828 sec +TOTAL : 0.564431 sec INFO: No Floating Point Exceptions have been reported - 2,356,568,658 cycles # 2.967 GHz - 3,666,157,119 instructions # 1.56 insn per cycle - 0.851031757 seconds time elapsed + 2,335,295,476 cycles # 2.965 GHz + 3,628,047,058 instructions # 1.55 insn per cycle + 0.844723791 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.109464e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.310084e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.310084e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.105961e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.305064e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.305064e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.027157 sec +TOTAL : 6.061656 sec INFO: No Floating Point Exceptions have been reported - 18,232,427,029 cycles # 3.023 GHz - 44,978,661,516 instructions # 2.47 insn per cycle - 6.032361273 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 410) (avx2: 0) (512y: 0) (512z: 0) + 18,285,648,193 cycles # 3.014 GHz + 45,012,181,796 instructions # 2.46 insn per cycle + 6.068344943 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 397) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.329924e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.545800e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.545800e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.291804e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.489005e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.489005e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.061730 sec +TOTAL : 3.124490 sec INFO: No Floating Point Exceptions have been reported - 9,324,636,350 cycles # 3.041 GHz - 22,261,175,312 instructions # 2.39 insn per cycle - 3.067057970 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1952) (avx2: 0) (512y: 0) (512z: 0) + 9,410,134,292 cycles # 3.006 GHz + 22,303,224,878 instructions # 2.37 insn per cycle + 3.131481201 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1940) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.485254e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.804597e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.804597e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.475997e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.815316e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.815316e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.895885 sec +TOTAL : 2.909295 sec INFO: No Floating Point Exceptions have been reported - 8,404,314,376 cycles # 2.898 GHz - 15,749,899,686 instructions # 1.87 insn per cycle - 2.901068192 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2583) (512y: 0) (512z: 0) + 8,493,085,415 cycles # 2.913 GHz + 15,781,425,735 instructions # 1.86 insn per cycle + 2.916002973 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2570) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.546580e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.928183e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.928183e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.513335e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.913286e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.913286e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.826440 sec +TOTAL : 2.878218 sec INFO: No Floating Point Exceptions have been reported - 8,267,422,079 cycles # 2.921 GHz - 15,597,882,535 instructions # 1.89 insn per cycle - 2.831511381 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2485) (512y: 12) (512z: 0) + 8,394,171,701 cycles # 2.911 GHz + 15,627,283,272 instructions # 1.86 insn per cycle + 2.884835196 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2469) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.588361e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.981784e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.981784e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.564665e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.956343e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.956343e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.789975 sec +TOTAL : 2.826301 sec INFO: No Floating Point Exceptions have been reported - 6,598,276,864 cycles # 2.361 GHz - 12,843,019,056 instructions # 1.95 insn per cycle - 2.795162036 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1706) (512y: 18) (512z: 1427) + 6,645,156,055 cycles # 2.346 GHz + 12,878,593,303 instructions # 1.94 insn per cycle + 2.832875887 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 18) (512z: 1427) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 8910beeb75..2b62892e6a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:40:58 +DATE: 2024-08-08_20:08:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.302115e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.150174e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.143059e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.451320e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.231819e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.130769e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.584501 sec +TOTAL : 0.567390 sec INFO: No Floating Point Exceptions have been reported - 2,297,094,729 cycles # 2.831 GHz - 3,594,137,904 instructions # 1.56 insn per cycle - 0.868040702 seconds time elapsed + 2,325,688,868 cycles # 2.936 GHz + 3,579,904,434 instructions # 1.54 insn per cycle + 0.848470717 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 121 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.706876e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.240008e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.240008e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.665768e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.163815e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.163815e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 4.041260 sec +TOTAL : 4.146283 sec INFO: No Floating Point Exceptions have been reported - 12,193,704,812 cycles # 3.014 GHz - 32,189,168,438 instructions # 2.64 insn per cycle - 4.046515911 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 303) (avx2: 0) (512y: 0) (512z: 0) + 12,236,614,644 cycles # 2.947 GHz + 32,269,366,728 instructions # 2.64 insn per cycle + 4.152494891 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 290) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.791622e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.749217e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.749217e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.716868e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.596230e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.596230e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.617891 sec +TOTAL : 2.692634 sec INFO: No Floating Point Exceptions have been reported - 7,979,787,357 cycles # 3.043 GHz - 18,695,484,383 instructions # 2.34 insn per cycle - 2.623236074 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1560) (avx2: 0) (512y: 0) (512z: 0) + 8,040,413,978 cycles # 2.980 GHz + 18,731,295,679 instructions # 2.33 insn per cycle + 2.699009464 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1548) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.915587e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.844943e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.844943e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.823808e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.734147e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.734147e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.513077 sec +TOTAL : 2.599488 sec INFO: No Floating Point Exceptions have been reported - 7,423,731,626 cycles # 2.949 GHz - 14,245,306,500 instructions # 1.92 insn per cycle - 2.518170239 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) + 7,529,267,846 cycles # 2.890 GHz + 14,278,306,013 instructions # 1.90 insn per cycle + 2.606005161 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2222) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.941748e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.948410e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.948410e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.881055e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.928068e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.928068e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.498266 sec +TOTAL : 2.551515 sec INFO: No Floating Point Exceptions have been reported - 7,289,469,121 cycles # 2.913 GHz - 13,941,958,463 instructions # 1.91 insn per cycle - 2.503658148 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2094) (512y: 3) (512z: 0) + 7,444,338,967 cycles # 2.911 GHz + 13,969,219,259 instructions # 1.88 insn per cycle + 2.557876734 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2074) (512y: 3) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.633828e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.112002e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.112002e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.593244e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.031185e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.031185e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.747861 sec +TOTAL : 2.800385 sec INFO: No Floating Point Exceptions have been reported - 6,514,962,868 cycles # 2.368 GHz - 13,424,477,335 instructions # 2.06 insn per cycle - 2.753003173 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2073) (512y: 1) (512z: 1197) + 6,564,002,113 cycles # 2.339 GHz + 13,450,088,279 instructions # 2.05 insn per cycle + 2.806913095 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2056) (512y: 1) (512z: 1197) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 2b9c5c9dab..5ae8d74446 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:41:20 +DATE: 2024-08-08_20:08:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.300237e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.170808e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.204667e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.456866e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.267705e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.218590e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.573714 sec +TOTAL : 0.568736 sec INFO: No Floating Point Exceptions have been reported - 2,371,507,340 cycles # 2.957 GHz - 3,677,447,642 instructions # 1.55 insn per cycle - 0.858936652 seconds time elapsed + 2,333,386,939 cycles # 2.946 GHz + 3,651,568,314 instructions # 1.56 insn per cycle + 0.849375970 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.261649e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.281605e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.281605e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.283106e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.333262e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.333262e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.141273 sec +TOTAL : 3.121614 sec INFO: No Floating Point Exceptions have been reported - 9,341,904,734 cycles # 2.970 GHz - 25,627,115,270 instructions # 2.74 insn per cycle - 3.146581121 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 256) (avx2: 0) (512y: 0) (512z: 0) + 9,386,181,268 cycles # 3.002 GHz + 25,683,181,247 instructions # 2.74 insn per cycle + 3.127889698 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 243) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.067172e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.682568e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.682568e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.093996e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.729930e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.729930e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.420676 sec +TOTAL : 2.404675 sec INFO: No Floating Point Exceptions have been reported - 7,248,612,827 cycles # 2.989 GHz - 16,867,376,247 instructions # 2.33 insn per cycle - 2.425928290 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1362) (avx2: 0) (512y: 0) (512z: 0) + 7,273,765,849 cycles # 3.018 GHz + 16,902,173,009 instructions # 2.32 insn per cycle + 2.411177480 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1350) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.035973e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.218524e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.218524e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.955814e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.106638e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.106638e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.431224 sec +TOTAL : 2.499207 sec INFO: No Floating Point Exceptions have been reported - 7,140,137,122 cycles # 2.932 GHz - 13,623,202,012 instructions # 1.91 insn per cycle - 2.436415043 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2061) (512y: 0) (512z: 0) + 7,265,897,672 cycles # 2.902 GHz + 13,654,744,957 instructions # 1.88 insn per cycle + 2.505830767 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2046) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.082251e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.384683e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.384683e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.024505e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.340418e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.340418e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.398263 sec +TOTAL : 2.448205 sec INFO: No Floating Point Exceptions have been reported - 7,046,497,716 cycles # 2.933 GHz - 13,426,599,430 instructions # 1.91 insn per cycle - 2.403454610 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1947) (512y: 4) (512z: 0) + 7,137,327,072 cycles # 2.909 GHz + 13,455,725,408 instructions # 1.89 insn per cycle + 2.454335523 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1927) (512y: 4) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.766798e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.419116e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.419116e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.717556e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.328622e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.328622e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.633986 sec +TOTAL : 2.693340 sec INFO: No Floating Point Exceptions have been reported - 6,317,197,476 cycles # 2.394 GHz - 13,153,165,220 instructions # 2.08 insn per cycle - 2.639245137 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2029) (512y: 1) (512z: 1083) + 6,390,724,476 cycles # 2.368 GHz + 13,180,968,753 instructions # 2.06 insn per cycle + 2.699833523 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2012) (512y: 1) (512z: 1083) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 5159836f27..dec1886a20 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:16:34 +DATE: 2024-08-08_19:49:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.097692e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.855962e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.153097e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.471546e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.855416e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.166311e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.674608 sec +TOTAL : 0.664565 sec INFO: No Floating Point Exceptions have been reported - 2,603,717,798 cycles # 2.865 GHz - 4,140,407,206 instructions # 1.59 insn per cycle - 0.968093967 seconds time elapsed + 2,673,452,306 cycles # 2.953 GHz + 4,096,581,433 instructions # 1.53 insn per cycle + 0.967198892 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.055185e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.225988e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.225988e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.042304e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.212707e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.212707e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.362286 sec +TOTAL : 6.467559 sec INFO: No Floating Point Exceptions have been reported - 19,341,325,065 cycles # 3.038 GHz - 46,294,100,838 instructions # 2.39 insn per cycle - 6.367819732 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 479) (avx2: 0) (512y: 0) (512z: 0) + 19,491,750,695 cycles # 3.010 GHz + 46,366,168,986 instructions # 2.38 insn per cycle + 6.476541865 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.685403e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.221591e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.221591e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.662736e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.194123e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.194123e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.135881 sec +TOTAL : 4.219503 sec INFO: No Floating Point Exceptions have been reported - 12,592,214,789 cycles # 3.041 GHz - 31,477,122,585 instructions # 2.50 insn per cycle - 4.141519100 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1732) (avx2: 0) (512y: 0) (512z: 0) + 12,706,673,121 cycles # 3.006 GHz + 31,586,088,348 instructions # 2.49 insn per cycle + 4.228514763 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1720) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.053122e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.867793e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.867793e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.015466e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.812156e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.812156e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.464196 sec +TOTAL : 3.548784 sec INFO: No Floating Point Exceptions have been reported - 10,097,562,543 cycles # 2.911 GHz - 19,468,852,516 instructions # 1.93 insn per cycle - 3.469759834 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2133) (512y: 0) (512z: 0) + 10,222,806,702 cycles # 2.874 GHz + 19,575,907,459 instructions # 1.91 insn per cycle + 3.557713338 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2123) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.083342e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.922426e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.922426e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.051557e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.890469e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.890469e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.419745 sec +TOTAL : 3.498884 sec INFO: No Floating Point Exceptions have been reported - 9,936,173,470 cycles # 2.902 GHz - 19,218,686,238 instructions # 1.93 insn per cycle - 3.425268707 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1874) (512y: 189) (512z: 0) + 10,092,991,859 cycles # 2.879 GHz + 19,324,671,897 instructions # 1.91 insn per cycle + 3.507900575 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1866) (512y: 189) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.875409e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.530800e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.530800e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.882298e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.563573e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.563573e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.761394 sec +TOTAL : 3.772337 sec INFO: No Floating Point Exceptions have been reported - 8,401,346,330 cycles # 2.231 GHz - 15,063,802,925 instructions # 1.79 insn per cycle - 3.766578754 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1028) (512y: 154) (512z: 1321) + 8,566,798,073 cycles # 2.266 GHz + 15,161,524,534 instructions # 1.77 insn per cycle + 3.781171342 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1044) (512y: 154) (512z: 1321) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index e166c6fc83..e7689b72e7 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-06-28_20:17:04 +DATE: 2024-08-08_19:50:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.133743e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.855252e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.188297e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.539005e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.550707e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.172141e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.666632 sec +TOTAL : 0.661474 sec INFO: No Floating Point Exceptions have been reported - 2,676,885,399 cycles # 2.944 GHz - 4,145,864,141 instructions # 1.55 insn per cycle - 0.969606465 seconds time elapsed + 2,649,580,670 cycles # 2.965 GHz + 4,041,332,680 instructions # 1.53 insn per cycle + 0.953046472 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.050080e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.221070e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.221070e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.034608e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.202440e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.202440e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.394211 sec +TOTAL : 6.513220 sec INFO: No Floating Point Exceptions have been reported - 19,359,472,456 cycles # 3.025 GHz - 46,231,043,339 instructions # 2.39 insn per cycle - 6.399922515 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) + 19,609,702,737 cycles # 3.007 GHz + 46,307,035,647 instructions # 2.36 insn per cycle + 6.522463944 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.676916e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.213764e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.213764e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.657659e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.187172e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.187172e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.157495 sec +TOTAL : 4.231188 sec INFO: No Floating Point Exceptions have been reported - 12,636,464,470 cycles # 3.036 GHz - 31,450,700,410 instructions # 2.49 insn per cycle - 4.162847802 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1724) (avx2: 0) (512y: 0) (512z: 0) + 12,732,843,853 cycles # 3.004 GHz + 31,560,321,434 instructions # 2.48 insn per cycle + 4.240067788 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1712) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.050588e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.855873e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.855873e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.029457e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.843800e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.843800e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.469434 sec +TOTAL : 3.528354 sec INFO: No Floating Point Exceptions have been reported - 10,081,149,851 cycles # 2.902 GHz - 19,455,615,991 instructions # 1.93 insn per cycle - 3.474830271 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2117) (512y: 0) (512z: 0) + 10,258,124,960 cycles # 2.901 GHz + 19,565,249,837 instructions # 1.91 insn per cycle + 3.537275385 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2107) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.076558e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.911672e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.911672e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.049544e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.886035e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.886035e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.433307 sec +TOTAL : 3.497781 sec INFO: No Floating Point Exceptions have been reported - 9,909,907,880 cycles # 2.882 GHz - 19,284,186,760 instructions # 1.95 insn per cycle - 3.438922876 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1868) (512y: 189) (512z: 0) + 10,124,826,634 cycles # 2.887 GHz + 19,390,299,312 instructions # 1.92 insn per cycle + 3.507669206 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1860) (512y: 189) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.915935e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.596273e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.596273e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.905533e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.593731e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.593731e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.685671 sec +TOTAL : 3.733502 sec INFO: No Floating Point Exceptions have been reported - 8,279,654,546 cycles # 2.244 GHz - 14,978,357,448 instructions # 1.81 insn per cycle - 3.691061407 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1021) (512y: 156) (512z: 1305) + 8,422,503,642 cycles # 2.251 GHz + 15,074,129,788 instructions # 1.79 insn per cycle + 3.742530520 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1037) (512y: 156) (512z: 1305) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index f2e16bc3a4..34e03e8fe4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:17:33 +DATE: 2024-08-08_19:50:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.887654e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.177294e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.279238e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.015578e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.167678e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279582e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.527287 sec +TOTAL : 0.520495 sec INFO: No Floating Point Exceptions have been reported - 2,162,264,043 cycles # 2.837 GHz - 3,099,754,233 instructions # 1.43 insn per cycle - 0.818700934 seconds time elapsed + 2,215,808,169 cycles # 2.946 GHz + 3,187,450,258 instructions # 1.44 insn per cycle + 0.809093508 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.882060e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.931187e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.931187e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.870302e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.920397e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.920397e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.678336 sec +TOTAL : 5.747684 sec INFO: No Floating Point Exceptions have been reported - 17,196,505,278 cycles # 3.026 GHz - 45,941,679,290 instructions # 2.67 insn per cycle - 5.684096482 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 631) (avx2: 0) (512y: 0) (512z: 0) + 17,324,193,414 cycles # 3.009 GHz + 46,060,464,647 instructions # 2.66 insn per cycle + 5.757711057 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 618) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.295547e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.463909e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.463909e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.256365e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.416045e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.416045e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.291073 sec +TOTAL : 3.359278 sec INFO: No Floating Point Exceptions have been reported - 10,024,978,903 cycles # 3.043 GHz - 27,842,089,342 instructions # 2.78 insn per cycle - 3.296637646 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2546) (avx2: 0) (512y: 0) (512z: 0) + 10,153,117,527 cycles # 3.015 GHz + 27,956,665,962 instructions # 2.75 insn per cycle + 3.369058986 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.233679e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.639497e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.639497e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.128206e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.537547e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.537547e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.110457 sec +TOTAL : 2.182924 sec INFO: No Floating Point Exceptions have been reported - 6,078,030,978 cycles # 2.873 GHz - 12,585,249,314 instructions # 2.07 insn per cycle - 2.116164658 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2695) (512y: 0) (512z: 0) + 6,226,289,605 cycles # 2.841 GHz + 12,698,897,797 instructions # 2.04 insn per cycle + 2.192278719 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2612) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.733341e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.227942e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.227942e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.605220e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.105851e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.105851e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.936443 sec +TOTAL : 2.009834 sec INFO: No Floating Point Exceptions have been reported - 5,569,510,614 cycles # 2.869 GHz - 12,022,484,967 instructions # 2.16 insn per cycle - 1.942002573 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2438) (512y: 144) (512z: 0) + 5,688,710,640 cycles # 2.818 GHz + 12,134,437,252 instructions # 2.13 insn per cycle + 2.019506075 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2350) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.719724e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.922615e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.922615e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.669310e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.868262e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.868262e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.926284 sec +TOTAL : 2.997018 sec INFO: No Floating Point Exceptions have been reported - 5,708,872,761 cycles # 1.948 GHz - 8,296,017,452 instructions # 1.45 insn per cycle - 2.931816776 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1459) (512y: 122) (512z: 1801) + 5,821,558,239 cycles # 1.938 GHz + 8,411,130,761 instructions # 1.44 insn per cycle + 3.006784964 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1429) (512y: 122) (512z: 1801) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 2020a39d40..20904d51fd 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:50:20 +DATE: 2024-08-08_20:17:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -53,15 +53,15 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.667501e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.207084e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.207084e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.670983e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.294260e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.294260e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.803011 sec +TOTAL : 0.801021 sec INFO: No Floating Point Exceptions have been reported - 3,110,018,268 cycles # 2.979 GHz - 4,839,889,726 instructions # 1.56 insn per cycle - 1.102667488 seconds time elapsed + 3,080,158,706 cycles # 2.935 GHz + 4,797,683,266 instructions # 1.56 insn per cycle + 1.107754362 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -70,8 +70,10 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -89,20 +91,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.881108e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.929315e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.929315e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.860613e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.909257e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.909257e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.762355 sec +TOTAL : 5.862697 sec INFO: No Floating Point Exceptions have been reported - 17,562,906,762 cycles # 3.045 GHz - 46,001,411,072 instructions # 2.62 insn per cycle - 5.769657879 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 631) (avx2: 0) (512y: 0) (512z: 0) + 17,649,346,443 cycles # 3.005 GHz + 46,130,000,854 instructions # 2.61 insn per cycle + 5.874952134 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 618) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -118,20 +121,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.229172e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.389158e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.389158e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.216658e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.372905e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.372905e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.435776 sec +TOTAL : 3.488934 sec INFO: No Floating Point Exceptions have been reported - 10,363,946,589 cycles # 3.011 GHz - 28,026,973,873 instructions # 2.70 insn per cycle - 3.442976592 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2546) (avx2: 0) (512y: 0) (512z: 0) + 10,528,637,782 cycles # 3.008 GHz + 28,161,635,226 instructions # 2.67 insn per cycle + 3.501603953 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -147,20 +151,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.097098e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.485023e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.485023e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.020861e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.404928e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.404928e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.247172 sec +TOTAL : 2.319862 sec INFO: No Floating Point Exceptions have been reported - 6,480,280,232 cycles # 2.876 GHz - 12,872,958,029 instructions # 1.99 insn per cycle - 2.254448726 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2695) (512y: 0) (512z: 0) + 6,615,013,287 cycles # 2.835 GHz + 13,014,509,842 instructions # 1.97 insn per cycle + 2.334044597 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2612) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -176,20 +181,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.611591e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.073609e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.073609e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.540790e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.009639e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.009639e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.055493 sec +TOTAL : 2.122719 sec INFO: No Floating Point Exceptions have been reported - 5,963,252,373 cycles # 2.892 GHz - 12,307,269,566 instructions # 2.06 insn per cycle - 2.062562699 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2438) (512y: 144) (512z: 0) + 6,074,435,637 cycles # 2.845 GHz + 12,446,562,239 instructions # 2.05 insn per cycle + 2.135603783 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2350) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -205,20 +211,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.494504e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.674936e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.674936e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.615591e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.807268e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.807268e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.197110 sec +TOTAL : 3.133757 sec INFO: No Floating Point Exceptions have been reported - 6,101,726,119 cycles # 1.909 GHz - 8,542,052,342 instructions # 1.40 insn per cycle - 3.204459961 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1459) (512y: 122) (512z: 1801) + 6,213,946,932 cycles # 1.975 GHz + 8,678,322,888 instructions # 1.40 insn per cycle + 3.146596624 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1429) (512y: 122) (512z: 1801) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 786b897430..278ba4b157 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_21:01:12 +DATE: 2024-08-08_20:29:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.755803e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.168375e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.278071e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.861886e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.169373e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.276724e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.616757 sec +TOTAL : 0.622862 sec INFO: No Floating Point Exceptions have been reported - 2,520,167,761 cycles # 2.979 GHz - 3,681,164,233 instructions # 1.46 insn per cycle - 0.903907581 seconds time elapsed + 2,496,588,832 cycles # 2.937 GHz + 3,616,944,645 instructions # 1.45 insn per cycle + 0.908999824 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.896601e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.946013e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.946013e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.858770e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.906877e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.906877e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 5.694069 sec +TOTAL : 5.824941 sec INFO: No Floating Point Exceptions have been reported - 17,355,363,175 cycles # 3.046 GHz - 45,958,013,397 instructions # 2.65 insn per cycle - 5.699661141 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 631) (avx2: 0) (512y: 0) (512z: 0) + 17,438,858,484 cycles # 2.991 GHz + 46,011,567,715 instructions # 2.64 insn per cycle + 5.831016559 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 618) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.289445e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.461820e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.461820e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.238383e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.396939e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.396939e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.358370 sec +TOTAL : 3.423850 sec INFO: No Floating Point Exceptions have been reported - 10,180,396,568 cycles # 3.027 GHz - 27,841,400,891 instructions # 2.73 insn per cycle - 3.363996103 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2546) (avx2: 0) (512y: 0) (512z: 0) + 10,272,842,406 cycles # 2.996 GHz + 27,901,302,334 instructions # 2.72 insn per cycle + 3.429671541 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.198567e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.612974e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.612974e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.121821e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.516246e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.516246e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.185592 sec +TOTAL : 2.235299 sec INFO: No Floating Point Exceptions have been reported - 6,286,458,818 cycles # 2.870 GHz - 12,568,024,915 instructions # 2.00 insn per cycle - 2.191070415 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2695) (512y: 0) (512z: 0) + 6,354,923,604 cycles # 2.835 GHz + 12,634,246,195 instructions # 1.99 insn per cycle + 2.242096681 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2612) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.698104e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.182473e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.182473e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.585808e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.053603e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.053603e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.010132 sec +TOTAL : 2.059756 sec INFO: No Floating Point Exceptions have been reported - 5,785,299,045 cycles # 2.872 GHz - 11,972,108,812 instructions # 2.07 insn per cycle - 2.015720254 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2438) (512y: 144) (512z: 0) + 5,815,690,450 cycles # 2.817 GHz + 12,015,299,257 instructions # 2.07 insn per cycle + 2.065558377 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2350) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.726032e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.924861e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.924861e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.643854e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.839235e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.839235e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.982171 sec +TOTAL : 3.061355 sec INFO: No Floating Point Exceptions have been reported - 5,927,841,436 cycles # 1.985 GHz - 8,247,377,383 instructions # 1.39 insn per cycle - 2.987679420 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1459) (512y: 122) (512z: 1801) + 5,933,052,882 cycles # 1.935 GHz + 8,290,148,322 instructions # 1.40 insn per cycle + 3.067159573 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1429) (512y: 122) (512z: 1801) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index 5f6d720529..fba3b57280 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:58:28 +DATE: 2024-08-08_20:26:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.724595e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166575e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.277602e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.905617e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.179466e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279851e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.558679 sec +TOTAL : 0.555994 sec INFO: No Floating Point Exceptions have been reported - 2,338,202,954 cycles # 2.969 GHz - 3,647,650,532 instructions # 1.56 insn per cycle - 0.844681870 seconds time elapsed + 2,284,248,162 cycles # 2.910 GHz + 3,522,733,929 instructions # 1.54 insn per cycle + 0.842109172 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.890469e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.939333e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.939333e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.864505e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.911828e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.911828e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.653742 sec +TOTAL : 5.728269 sec INFO: No Floating Point Exceptions have been reported - 17,201,984,378 cycles # 3.040 GHz - 45,941,543,910 instructions # 2.67 insn per cycle - 5.659339476 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 631) (avx2: 0) (512y: 0) (512z: 0) + 17,201,286,704 cycles # 3.001 GHz + 45,937,216,481 instructions # 2.67 insn per cycle + 5.733811627 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 618) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.308666e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.474491e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.474491e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.250062e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.410672e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.410672e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.276689 sec +TOTAL : 3.334295 sec INFO: No Floating Point Exceptions have been reported - 10,031,778,452 cycles # 3.057 GHz - 27,843,325,740 instructions # 2.78 insn per cycle - 3.282220552 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2546) (avx2: 0) (512y: 0) (512z: 0) + 10,038,224,892 cycles # 3.006 GHz + 27,841,209,673 instructions # 2.77 insn per cycle + 3.340129450 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.201736e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.604981e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.604981e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.145160e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.541205e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.541205e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.124462 sec +TOTAL : 2.147149 sec INFO: No Floating Point Exceptions have been reported - 6,110,813,008 cycles # 2.870 GHz - 12,584,901,063 instructions # 2.06 insn per cycle - 2.130089416 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2695) (512y: 0) (512z: 0) + 6,102,474,947 cycles # 2.835 GHz + 12,591,341,324 instructions # 2.06 insn per cycle + 2.153315340 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2612) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.746310e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.244054e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.244054e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.639021e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.126234e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.126234e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.934375 sec +TOTAL : 1.968961 sec INFO: No Floating Point Exceptions have been reported - 5,581,726,506 cycles # 2.878 GHz - 12,022,590,996 instructions # 2.15 insn per cycle - 1.940001347 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2438) (512y: 144) (512z: 0) + 5,608,749,777 cycles # 2.841 GHz + 12,024,185,128 instructions # 2.14 insn per cycle + 1.975078079 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2350) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.758674e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.959974e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.959974e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.641587e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.834103e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.834103e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.897270 sec +TOTAL : 2.988248 sec INFO: No Floating Point Exceptions have been reported - 5,727,759,334 cycles # 1.974 GHz - 8,296,479,561 instructions # 1.45 insn per cycle - 2.902844422 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1459) (512y: 122) (512z: 1801) + 5,720,578,029 cycles # 1.911 GHz + 8,299,459,915 instructions # 1.45 insn per cycle + 2.994289958 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1429) (512y: 122) (512z: 1801) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index f356364f29..9e3fe4acb0 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:55:48 +DATE: 2024-08-08_20:23:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,22 +50,24 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.908224e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.168760e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.278988e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.032256e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.173338e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277454e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.708513 sec +TOTAL : 0.705014 sec INFO: No Floating Point Exceptions have been reported - 2,788,384,939 cycles # 2.977 GHz - 4,369,585,987 instructions # 1.57 insn per cycle - 0.994652751 seconds time elapsed + 2,749,776,676 cycles # 2.945 GHz + 4,325,337,591 instructions # 1.57 insn per cycle + 0.991327218 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -82,20 +84,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.898323e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.947064e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.947064e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.868158e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.916528e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.916528e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.630810 sec +TOTAL : 5.717662 sec INFO: No Floating Point Exceptions have been reported - 17,199,246,494 cycles # 3.053 GHz - 45,942,002,374 instructions # 2.67 insn per cycle - 5.636616917 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 631) (avx2: 0) (512y: 0) (512z: 0) + 17,178,289,091 cycles # 3.002 GHz + 45,937,241,973 instructions # 2.67 insn per cycle + 5.723215350 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 618) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -110,20 +113,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.281603e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.445852e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.445852e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.231136e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.391441e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.391441e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.304211 sec +TOTAL : 3.354044 sec INFO: No Floating Point Exceptions have been reported - 10,005,316,559 cycles # 3.024 GHz - 27,841,805,216 instructions # 2.78 insn per cycle - 3.310216435 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2546) (avx2: 0) (512y: 0) (512z: 0) + 10,031,479,526 cycles # 2.986 GHz + 27,844,808,096 instructions # 2.78 insn per cycle + 3.359952965 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2534) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -138,20 +142,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.157548e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.551032e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.551032e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.099162e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.490827e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.490827e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.141315 sec +TOTAL : 2.161987 sec INFO: No Floating Point Exceptions have been reported - 6,097,247,438 cycles # 2.841 GHz - 12,585,204,824 instructions # 2.06 insn per cycle - 2.147048600 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2695) (512y: 0) (512z: 0) + 6,083,392,852 cycles # 2.808 GHz + 12,576,453,088 instructions # 2.07 insn per cycle + 2.167500908 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2612) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -166,20 +171,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.738562e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.232955e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.232955e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.632481e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.118699e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.118699e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.934942 sec +TOTAL : 1.966228 sec INFO: No Floating Point Exceptions have been reported - 5,595,518,172 cycles # 2.884 GHz - 12,020,747,019 instructions # 2.15 insn per cycle - 1.940748260 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2438) (512y: 144) (512z: 0) + 5,587,261,117 cycles # 2.835 GHz + 12,016,452,187 instructions # 2.15 insn per cycle + 1.971550633 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2350) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -194,20 +200,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.575127e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.766861e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.766861e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.687020e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.882322e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.882322e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.043164 sec +TOTAL : 2.948998 sec INFO: No Floating Point Exceptions have been reported - 5,727,292,763 cycles # 1.880 GHz - 8,298,018,766 instructions # 1.45 insn per cycle - 3.048744350 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1459) (512y: 122) (512z: 1801) + 5,710,948,756 cycles # 1.934 GHz + 8,289,147,048 instructions # 1.45 insn per cycle + 2.954636423 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1429) (512y: 122) (512z: 1801) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 886aa2766b..dd8639d462 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:17:58 +DATE: 2024-08-08_19:51:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.892000e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.175238e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.276354e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.953365e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.169057e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275879e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.517809 sec +TOTAL : 0.516826 sec INFO: No Floating Point Exceptions have been reported - 2,225,604,920 cycles # 2.968 GHz - 3,221,516,712 instructions # 1.45 insn per cycle - 0.807006941 seconds time elapsed + 2,205,203,774 cycles # 2.951 GHz + 3,179,876,331 instructions # 1.44 insn per cycle + 0.803907668 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.950456e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.002156e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.002156e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.926342e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.977633e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.977633e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.482127 sec +TOTAL : 5.581069 sec INFO: No Floating Point Exceptions have been reported - 16,704,752,814 cycles # 3.045 GHz - 44,935,943,922 instructions # 2.69 insn per cycle - 5.487861081 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 581) (avx2: 0) (512y: 0) (512z: 0) + 16,849,073,106 cycles # 3.014 GHz + 45,045,731,432 instructions # 2.67 insn per cycle + 5.590685845 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 568) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.473548e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.656412e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.656412e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.423058e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.602908e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.602908e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.126169 sec +TOTAL : 3.201422 sec INFO: No Floating Point Exceptions have been reported - 9,527,553,742 cycles # 3.043 GHz - 26,700,808,818 instructions # 2.80 insn per cycle - 3.131739636 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2343) (avx2: 0) (512y: 0) (512z: 0) + 9,674,035,774 cycles # 3.013 GHz + 26,815,165,030 instructions # 2.77 insn per cycle + 3.211231348 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2331) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.762240e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.097629e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.097629e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.649217e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.990962e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.990962e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.308155 sec +TOTAL : 2.396466 sec INFO: No Floating Point Exceptions have been reported - 6,597,424,472 cycles # 2.852 GHz - 14,122,933,490 instructions # 2.14 insn per cycle - 2.313712672 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2786) (512y: 0) (512z: 0) + 6,732,899,102 cycles # 2.799 GHz + 14,237,973,279 instructions # 2.11 insn per cycle + 2.406196706 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2703) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.975505e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.343599e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.343599e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.923382e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.291610e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.291610e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.214876 sec +TOTAL : 2.269821 sec INFO: No Floating Point Exceptions have been reported - 6,337,916,387 cycles # 2.856 GHz - 13,710,130,386 instructions # 2.16 insn per cycle - 2.220426524 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2437) (512y: 297) (512z: 0) + 6,473,185,925 cycles # 2.841 GHz + 13,823,290,533 instructions # 2.14 insn per cycle + 2.279550700 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2349) (512y: 297) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.536585e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.718678e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.718678e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.570682e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.758312e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.758312e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.073115 sec +TOTAL : 3.077054 sec INFO: No Floating Point Exceptions have been reported - 5,918,243,053 cycles # 1.923 GHz - 10,064,016,017 instructions # 1.70 insn per cycle - 3.078700521 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1291) (512y: 208) (512z: 1987) + 6,015,923,061 cycles # 1.950 GHz + 10,176,638,000 instructions # 1.69 insn per cycle + 3.086647254 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1261) (512y: 208) (512z: 1987) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 1f5555d1f3..1d562b1c51 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:41:41 +DATE: 2024-08-08_20:08:54 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.588122e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.164655e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.281010e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.079454e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.184027e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.281167e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.525240 sec +TOTAL : 0.525003 sec INFO: No Floating Point Exceptions have been reported - 2,248,450,640 cycles # 2.948 GHz - 3,243,362,318 instructions # 1.44 insn per cycle - 0.819380731 seconds time elapsed + 2,200,806,347 cycles # 2.912 GHz + 3,172,188,132 instructions # 1.44 insn per cycle + 0.814200484 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.496274e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.581602e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.581602e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.477886e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.565553e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.565553e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.308010 sec +TOTAL : 4.370943 sec INFO: No Floating Point Exceptions have been reported - 13,011,911,332 cycles # 3.017 GHz - 34,349,737,743 instructions # 2.64 insn per cycle - 4.313568297 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 678) (avx2: 0) (512y: 0) (512z: 0) + 13,117,582,836 cycles # 2.995 GHz + 34,450,679,536 instructions # 2.63 insn per cycle + 4.380756610 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 665) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.074252e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.216149e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.216149e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.033084e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.174712e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.174712e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.518078 sec +TOTAL : 3.593818 sec INFO: No Floating Point Exceptions have been reported - 10,696,750,210 cycles # 3.037 GHz - 24,006,049,306 instructions # 2.24 insn per cycle - 3.523505209 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2: 0) (512y: 0) (512z: 0) + 10,811,449,443 cycles # 3.001 GHz + 24,123,594,949 instructions # 2.23 insn per cycle + 3.603506153 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2571) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.789528e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.126618e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.126618e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.731678e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.069353e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.069353e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.295706 sec +TOTAL : 2.354482 sec INFO: No Floating Point Exceptions have been reported - 6,575,533,125 cycles # 2.859 GHz - 12,347,200,737 instructions # 1.88 insn per cycle - 2.301423963 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3121) (512y: 0) (512z: 0) + 6,707,294,523 cycles # 2.838 GHz + 12,465,505,098 instructions # 1.86 insn per cycle + 2.364349203 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3096) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.011477e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.379769e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.379769e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.061977e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.447561e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.447561e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.199416 sec +TOTAL : 2.207748 sec INFO: No Floating Point Exceptions have been reported - 6,148,102,598 cycles # 2.790 GHz - 11,570,413,212 instructions # 1.88 insn per cycle - 2.204830710 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2671) (512y: 239) (512z: 0) + 6,305,288,080 cycles # 2.845 GHz + 11,685,678,996 instructions # 1.85 insn per cycle + 2.217142463 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2640) (512y: 239) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.881899e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.105309e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.105309e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.929117e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.157594e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.157594e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.810767 sec +TOTAL : 2.806226 sec INFO: No Floating Point Exceptions have been reported - 5,374,795,520 cycles # 1.909 GHz - 9,285,133,018 instructions # 1.73 insn per cycle - 2.816377054 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2114) (512y: 282) (512z: 1954) + 5,500,190,609 cycles # 1.954 GHz + 9,401,836,893 instructions # 1.71 insn per cycle + 2.816415768 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2084) (512y: 282) (512z: 1954) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index 6c085aaca8..65dd600686 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:42:05 +DATE: 2024-08-08_20:09:18 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.572873e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.158466e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.276623e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.067308e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.179547e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.276758e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.522810 sec +TOTAL : 0.523442 sec INFO: No Floating Point Exceptions have been reported - 2,250,104,415 cycles # 2.965 GHz - 3,184,818,143 instructions # 1.42 insn per cycle - 0.815485646 seconds time elapsed + 2,203,163,418 cycles # 2.923 GHz + 3,173,114,436 instructions # 1.44 insn per cycle + 0.812619708 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.660610e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.760897e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.760897e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.597347e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.694908e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.694908e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.049986 sec +TOTAL : 4.173436 sec INFO: No Floating Point Exceptions have been reported - 12,325,342,169 cycles # 3.040 GHz - 34,919,769,447 instructions # 2.83 insn per cycle - 4.055421218 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 443) (avx2: 0) (512y: 0) (512z: 0) + 12,532,788,513 cycles # 2.997 GHz + 35,033,869,738 instructions # 2.80 insn per cycle + 4.183331959 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 430) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.076627e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.218560e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.218560e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.046469e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.187931e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.187931e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.515029 sec +TOTAL : 3.579716 sec INFO: No Floating Point Exceptions have been reported - 10,701,072,922 cycles # 3.040 GHz - 23,007,343,701 instructions # 2.15 insn per cycle - 3.520527508 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2349) (avx2: 0) (512y: 0) (512z: 0) + 10,790,492,364 cycles # 3.007 GHz + 23,124,229,685 instructions # 2.14 insn per cycle + 3.589416563 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2339) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.085752e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.472257e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.472257e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.059739e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.450926e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.450926e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.171429 sec +TOTAL : 2.211695 sec INFO: No Floating Point Exceptions have been reported - 6,197,707,325 cycles # 2.848 GHz - 11,955,784,112 instructions # 1.93 insn per cycle - 2.176803625 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2509) (512y: 0) (512z: 0) + 6,295,892,975 cycles # 2.836 GHz + 12,072,618,893 instructions # 1.92 insn per cycle + 2.220989978 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2484) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.236227e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.640130e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.640130e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.997474e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.374849e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.374849e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.108665 sec +TOTAL : 2.235122 sec INFO: No Floating Point Exceptions have been reported - 6,042,683,756 cycles # 2.860 GHz - 11,130,717,499 instructions # 1.84 insn per cycle - 2.114024511 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2126) (512y: 174) (512z: 0) + 6,279,000,139 cycles # 2.798 GHz + 11,243,252,484 instructions # 1.79 insn per cycle + 2.244690704 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2095) (512y: 174) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.112043e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.358996e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.358996e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.095312e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.342354e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.342354e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.657682 sec +TOTAL : 2.697672 sec INFO: No Floating Point Exceptions have been reported - 5,213,529,009 cycles # 1.959 GHz - 9,023,567,185 instructions # 1.73 insn per cycle - 2.663130028 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1650) (512y: 208) (512z: 1570) + 5,310,077,423 cycles # 1.962 GHz + 9,140,837,043 instructions # 1.72 insn per cycle + 2.707468994 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1620) (512y: 208) (512z: 1570) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 3c812282d1..38766f6059 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:18:22 +DATE: 2024-08-08_19:51:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.971515e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.203043e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.398328e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.614637e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.196490e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.391083e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.481274 sec +TOTAL : 0.477185 sec INFO: No Floating Point Exceptions have been reported - 2,080,512,591 cycles # 2.958 GHz - 2,994,385,775 instructions # 1.44 insn per cycle - 0.762196231 seconds time elapsed + 2,083,240,592 cycles # 2.927 GHz + 2,954,253,066 instructions # 1.42 insn per cycle + 0.768394565 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 149 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.000726e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.056957e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.056957e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.972261e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.028190e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.028190e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.325245 sec +TOTAL : 5.413447 sec INFO: No Floating Point Exceptions have been reported - 16,233,475,504 cycles # 3.046 GHz - 45,338,704,501 instructions # 2.79 insn per cycle - 5.330482823 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 605) (avx2: 0) (512y: 0) (512z: 0) + 16,298,510,952 cycles # 3.008 GHz + 45,383,093,310 instructions # 2.78 insn per cycle + 5.420499578 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 592) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.688881e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.042825e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.042825e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.516274e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.853993e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.853993e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.322133 sec +TOTAL : 2.420950 sec INFO: No Floating Point Exceptions have been reported - 7,081,033,131 cycles # 3.044 GHz - 17,775,454,632 instructions # 2.51 insn per cycle - 2.327454749 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3151) (avx2: 0) (512y: 0) (512z: 0) + 7,111,183,634 cycles # 2.930 GHz + 17,819,948,567 instructions # 2.51 insn per cycle + 2.427658659 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.187763e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.285010e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.285010e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.607320e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.824778e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.824778e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.367437 sec +TOTAL : 1.317016 sec INFO: No Floating Point Exceptions have been reported - 3,744,621,805 cycles # 2.730 GHz - 8,265,608,992 instructions # 2.21 insn per cycle - 1.372764201 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3374) (512y: 0) (512z: 0) + 3,802,543,905 cycles # 2.874 GHz + 8,308,913,768 instructions # 2.19 insn per cycle + 1.323729586 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3350) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.188890e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.051258e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.051258e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.087676e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.047463e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.047463e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.225759 sec +TOTAL : 1.251137 sec INFO: No Floating Point Exceptions have been reported - 3,547,894,530 cycles # 2.883 GHz - 7,919,949,757 instructions # 2.23 insn per cycle - 1.231119552 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3226) (512y: 20) (512z: 0) + 3,608,199,910 cycles # 2.871 GHz + 7,963,896,839 instructions # 2.21 insn per cycle + 1.257792419 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3196) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.887488e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.593324e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.593324e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.851468e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.561768e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.561768e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.606939 sec +TOTAL : 1.629029 sec INFO: No Floating Point Exceptions have been reported - 3,254,587,487 cycles # 2.020 GHz - 6,098,819,103 instructions # 1.87 insn per cycle - 1.612213485 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2400) (512y: 24) (512z: 2152) + 3,306,960,550 cycles # 2.023 GHz + 6,143,321,587 instructions # 1.86 insn per cycle + 1.635836688 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2287) (512y: 24) (512z: 2153) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 8ffba60f10..87c93d2ebd 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:50:45 +DATE: 2024-08-08_20:18:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -53,15 +53,15 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.207285e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.559811e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.559811e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.181597e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.725510e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.725510e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.666449 sec +TOTAL : 0.672294 sec INFO: No Floating Point Exceptions have been reported - 2,675,605,031 cycles # 2.990 GHz - 4,135,337,657 instructions # 1.55 insn per cycle - 0.951611952 seconds time elapsed + 2,617,099,456 cycles # 2.904 GHz + 4,062,920,786 instructions # 1.55 insn per cycle + 0.957784001 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -70,8 +70,10 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo ==PROF== Profiling "sigmaKin": launch__registers_per_thread 149 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -89,20 +91,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.790072e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.842494e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.842494e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.956957e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.011198e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.011198e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 6.016259 sec +TOTAL : 5.484325 sec INFO: No Floating Point Exceptions have been reported - 16,448,241,050 cycles # 2.861 GHz - 45,389,130,693 instructions # 2.76 insn per cycle - 6.023243626 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 605) (avx2: 0) (512y: 0) (512z: 0) + 16,490,289,692 cycles # 3.004 GHz + 45,381,699,221 instructions # 2.75 insn per cycle + 5.490323533 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 592) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -118,20 +121,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.342252e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.671673e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.671673e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.582859e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.920444e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.920444e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.552581 sec +TOTAL : 2.418229 sec INFO: No Floating Point Exceptions have been reported - 7,264,559,534 cycles # 2.868 GHz - 18,055,626,229 instructions # 2.49 insn per cycle - 2.558761668 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3151) (avx2: 0) (512y: 0) (512z: 0) + 7,267,277,115 cycles # 2.998 GHz + 18,050,295,436 instructions # 2.48 insn per cycle + 2.424701000 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -147,20 +151,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.885868e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.955691e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.955691e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.393268e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.547596e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.547596e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.482805 sec +TOTAL : 1.379855 sec INFO: No Floating Point Exceptions have been reported - 3,959,135,121 cycles # 2.691 GHz - 8,502,209,023 instructions # 2.15 insn per cycle - 1.491550395 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3374) (512y: 0) (512z: 0) + 3,938,588,665 cycles # 2.843 GHz + 8,495,556,645 instructions # 2.16 insn per cycle + 1.386260790 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3350) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -176,20 +181,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.125003e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.251893e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.251893e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.873570e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.014552e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.014552e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.434348 sec +TOTAL : 1.313964 sec INFO: No Floating Point Exceptions have been reported - 3,772,960,788 cycles # 2.685 GHz - 8,159,229,827 instructions # 2.16 insn per cycle - 1.441299263 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3226) (512y: 20) (512z: 0) + 3,770,505,615 cycles # 2.857 GHz + 8,157,653,367 instructions # 2.16 insn per cycle + 1.320625840 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3196) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -205,20 +211,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.909757e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.503087e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.503087e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.668614e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.340392e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.340392e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.941680 sec +TOTAL : 1.706792 sec INFO: No Floating Point Exceptions have been reported - 3,466,655,106 cycles # 1.848 GHz - 6,354,787,813 instructions # 1.83 insn per cycle - 1.949951282 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2400) (512y: 24) (512z: 2152) + 3,475,092,320 cycles # 2.029 GHz + 6,350,458,775 instructions # 1.83 insn per cycle + 1.713327675 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2287) (512y: 24) (512z: 2153) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 8628d648bd..a8425bb782 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_21:01:37 +DATE: 2024-08-08_20:29:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.877973e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.175805e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.383438e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.044161e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.197356e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.390140e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.567054 sec +TOTAL : 0.573091 sec INFO: No Floating Point Exceptions have been reported - 2,330,915,980 cycles # 2.965 GHz - 3,416,048,789 instructions # 1.47 insn per cycle - 0.844941386 seconds time elapsed + 2,302,500,947 cycles # 2.899 GHz + 3,359,714,134 instructions # 1.46 insn per cycle + 0.851330175 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 149 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.006766e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.063518e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.063518e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.971169e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.027848e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.027848e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 5.365120 sec +TOTAL : 5.460102 sec INFO: No Floating Point Exceptions have been reported - 16,409,243,058 cycles # 3.056 GHz - 45,368,159,236 instructions # 2.76 insn per cycle - 5.370408040 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 605) (avx2: 0) (512y: 0) (512z: 0) + 16,412,251,635 cycles # 3.004 GHz + 45,363,438,738 instructions # 2.76 insn per cycle + 5.465223733 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 592) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.694480e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.046550e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.046550e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.639399e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.984668e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.984668e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.374059 sec +TOTAL : 2.397788 sec INFO: No Floating Point Exceptions have been reported - 7,244,886,908 cycles # 3.046 GHz - 17,787,068,117 instructions # 2.46 insn per cycle - 2.379307738 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3151) (avx2: 0) (512y: 0) (512z: 0) + 7,225,778,706 cycles # 3.008 GHz + 17,780,590,298 instructions # 2.46 insn per cycle + 2.402807836 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.623336e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.814218e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.814218e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.542458e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.724935e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.724935e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.356912 sec +TOTAL : 1.365171 sec INFO: No Floating Point Exceptions have been reported - 3,928,174,846 cycles # 2.885 GHz - 8,249,136,865 instructions # 2.10 insn per cycle - 1.362262336 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3374) (512y: 0) (512z: 0) + 3,905,630,598 cycles # 2.852 GHz + 8,242,044,959 instructions # 2.11 insn per cycle + 1.370327142 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3350) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.101855e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.043662e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.043662e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.995768e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.031926e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.031926e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.293228 sec +TOTAL : 1.306099 sec INFO: No Floating Point Exceptions have been reported - 3,737,571,332 cycles # 2.880 GHz - 7,870,738,692 instructions # 2.11 insn per cycle - 1.298462511 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3226) (512y: 20) (512z: 0) + 3,721,703,946 cycles # 2.840 GHz + 7,863,594,201 instructions # 2.11 insn per cycle + 1.311330370 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3196) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.862282e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.577950e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.577950e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.758543e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.446976e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.446976e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.670875 sec +TOTAL : 1.692116 sec INFO: No Floating Point Exceptions have been reported - 3,429,920,435 cycles # 2.047 GHz - 6,049,639,626 instructions # 1.76 insn per cycle - 1.676119731 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2400) (512y: 24) (512z: 2152) + 3,425,904,021 cycles # 2.019 GHz + 6,042,797,691 instructions # 1.76 insn per cycle + 1.697363173 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2287) (512y: 24) (512z: 2153) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index bff07c6868..a9cab1763c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:58:52 +DATE: 2024-08-08_20:26:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.792680e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.180620e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.394308e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.225239e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.197913e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.389129e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.510503 sec +TOTAL : 0.517448 sec INFO: No Floating Point Exceptions have been reported - 2,177,491,273 cycles # 2.975 GHz - 3,411,887,204 instructions # 1.57 insn per cycle - 0.788564891 seconds time elapsed + 2,112,624,842 cycles # 2.859 GHz + 3,317,853,292 instructions # 1.57 insn per cycle + 0.795716447 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 149 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.987602e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.045188e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.045188e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.922136e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.976186e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.976186e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.361967 sec +TOTAL : 5.540195 sec INFO: No Floating Point Exceptions have been reported - 16,253,507,490 cycles # 3.029 GHz - 45,338,452,684 instructions # 2.79 insn per cycle - 5.367358373 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 605) (avx2: 0) (512y: 0) (512z: 0) + 16,275,080,243 cycles # 2.936 GHz + 45,337,789,928 instructions # 2.79 insn per cycle + 5.545390256 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 592) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.531253e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.854760e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.854760e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.488675e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.824628e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.824628e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.399620 sec +TOTAL : 2.422437 sec INFO: No Floating Point Exceptions have been reported - 7,075,172,269 cycles # 2.944 GHz - 17,774,820,782 instructions # 2.51 insn per cycle - 2.404849962 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3151) (avx2: 0) (512y: 0) (512z: 0) + 7,052,758,354 cycles # 2.906 GHz + 17,767,509,302 instructions # 2.52 insn per cycle + 2.427864435 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.656451e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.855005e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.855005e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.294778e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.430722e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.430722e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.296967 sec +TOTAL : 1.350098 sec INFO: No Floating Point Exceptions have been reported - 3,759,549,216 cycles # 2.888 GHz - 8,264,701,307 instructions # 2.20 insn per cycle - 1.302290635 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3374) (512y: 0) (512z: 0) + 3,737,878,511 cycles # 2.759 GHz + 8,257,495,819 instructions # 2.21 insn per cycle + 1.355605620 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3350) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.175558e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.053477e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.053477e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.700373e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.969590e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.969590e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.226419 sec +TOTAL : 1.290488 sec INFO: No Floating Point Exceptions have been reported - 3,556,498,736 cycles # 2.889 GHz - 7,920,503,592 instructions # 2.23 insn per cycle - 1.231741361 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3226) (512y: 20) (512z: 0) + 3,556,397,958 cycles # 2.746 GHz + 7,911,980,107 instructions # 2.22 insn per cycle + 1.296127398 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3196) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.953556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.671434e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.671434e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.356565e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.990428e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.990428e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.592884 sec +TOTAL : 1.736165 sec INFO: No Floating Point Exceptions have been reported - 3,261,158,464 cycles # 2.042 GHz - 6,099,877,801 instructions # 1.87 insn per cycle - 1.598103261 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2400) (512y: 24) (512z: 2152) + 3,256,937,975 cycles # 1.871 GHz + 6,093,354,447 instructions # 1.87 insn per cycle + 1.741565922 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2287) (512y: 24) (512z: 2153) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index 3fc0fbbc6c..1b7d56c0f4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:56:13 +DATE: 2024-08-08_20:23:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,22 +50,24 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.903263e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.184841e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.384340e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.925974e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.195417e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.383637e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.613170 sec +TOTAL : 0.617651 sec INFO: No Floating Point Exceptions have been reported - 2,489,925,895 cycles # 2.985 GHz - 3,862,378,401 instructions # 1.55 insn per cycle - 0.890452214 seconds time elapsed + 2,472,700,101 cycles # 2.956 GHz + 3,844,270,088 instructions # 1.55 insn per cycle + 0.895131936 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 149 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -82,20 +84,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.005717e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.061976e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.061976e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.959227e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.014297e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.014297e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.312002 sec +TOTAL : 5.435139 sec INFO: No Floating Point Exceptions have been reported - 16,240,467,534 cycles # 3.055 GHz - 45,338,534,619 instructions # 2.79 insn per cycle - 5.317436580 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 605) (avx2: 0) (512y: 0) (512z: 0) + 16,264,887,736 cycles # 2.990 GHz + 45,334,381,661 instructions # 2.79 insn per cycle + 5.440210307 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 592) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -110,20 +113,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.653354e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.006022e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.006022e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.519066e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.848466e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.848466e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.338600 sec +TOTAL : 2.405682 sec INFO: No Floating Point Exceptions have been reported - 7,086,830,161 cycles # 3.025 GHz - 17,776,494,030 instructions # 2.51 insn per cycle - 2.344006781 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3151) (avx2: 0) (512y: 0) (512z: 0) + 7,056,903,182 cycles # 2.928 GHz + 17,767,514,446 instructions # 2.52 insn per cycle + 2.410973137 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -138,20 +142,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.547528e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.715398e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.715398e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.565756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.749553e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.749553e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.312500 sec +TOTAL : 1.305436 sec INFO: No Floating Point Exceptions have been reported - 3,749,385,603 cycles # 2.846 GHz - 8,264,672,474 instructions # 2.20 insn per cycle - 1.317884402 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3374) (512y: 0) (512z: 0) + 3,753,143,327 cycles # 2.865 GHz + 8,257,983,801 instructions # 2.20 insn per cycle + 1.310628316 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3350) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -166,20 +171,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.067824e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.042356e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.042356e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.040312e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.036836e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.036836e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.242601 sec +TOTAL : 1.242569 sec INFO: No Floating Point Exceptions have been reported - 3,563,693,975 cycles # 2.857 GHz - 7,919,771,531 instructions # 2.22 insn per cycle - 1.248059174 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3226) (512y: 20) (512z: 0) + 3,552,004,540 cycles # 2.848 GHz + 7,912,724,917 instructions # 2.23 insn per cycle + 1.247741947 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3196) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -194,20 +200,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.790143e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.492603e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.492603e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.813901e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.506813e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.506813e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.632153 sec +TOTAL : 1.621227 sec INFO: No Floating Point Exceptions have been reported - 3,268,889,948 cycles # 1.997 GHz - 6,099,013,155 instructions # 1.87 insn per cycle - 1.637536774 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2400) (512y: 24) (512z: 2152) + 3,253,421,004 cycles # 2.002 GHz + 6,092,602,588 instructions # 1.87 insn per cycle + 1.626390565 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2287) (512y: 24) (512z: 2153) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 0c09829995..613986d3ca 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:18:42 +DATE: 2024-08-08_19:51:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.541434e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.458470e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.707090e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.011234e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.481106e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.718662e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.479969 sec +TOTAL : 0.482144 sec INFO: No Floating Point Exceptions have been reported - 2,105,563,726 cycles # 2.968 GHz - 3,011,385,971 instructions # 1.43 insn per cycle - 0.766621397 seconds time elapsed + 2,069,508,701 cycles # 2.943 GHz + 2,973,558,730 instructions # 1.44 insn per cycle + 0.762169669 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.034864e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.093151e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.093151e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.000971e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.057776e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.057776e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.237994 sec +TOTAL : 5.337569 sec INFO: No Floating Point Exceptions have been reported - 15,955,011,989 cycles # 3.044 GHz - 44,449,172,061 instructions # 2.79 insn per cycle - 5.243459295 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 550) (avx2: 0) (512y: 0) (512z: 0) + 16,045,528,009 cycles # 3.003 GHz + 44,492,603,616 instructions # 2.77 insn per cycle + 5.344572857 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 537) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.519058e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.010085e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.010085e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.399267e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.870292e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.870292e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.985271 sec +TOTAL : 2.040967 sec INFO: No Floating Point Exceptions have been reported - 6,065,760,073 cycles # 3.048 GHz - 17,080,403,379 instructions # 2.82 insn per cycle - 1.990555127 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2882) (avx2: 0) (512y: 0) (512z: 0) + 6,120,195,211 cycles # 2.990 GHz + 17,124,524,771 instructions # 2.80 insn per cycle + 2.047704691 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2864) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.223952e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.826129e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.826129e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.231646e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.843621e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.843621e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.771606 sec +TOTAL : 1.779814 sec INFO: No Floating Point Exceptions have been reported - 5,023,238,217 cycles # 2.828 GHz - 10,229,394,821 instructions # 2.04 insn per cycle - 1.777232545 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3917) (512y: 0) (512z: 0) + 5,080,547,059 cycles # 2.845 GHz + 10,273,415,072 instructions # 2.02 insn per cycle + 1.786648263 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3893) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.342994e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.956517e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.956517e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.292968e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.928983e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.928983e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.736711 sec +TOTAL : 1.763357 sec INFO: No Floating Point Exceptions have been reported - 4,975,198,217 cycles # 2.858 GHz - 9,999,864,692 instructions # 2.01 insn per cycle - 1.741779350 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3824) (512y: 2) (512z: 0) + 5,036,199,960 cycles # 2.847 GHz + 10,043,698,662 instructions # 1.99 insn per cycle + 1.770080531 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3794) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.869806e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.213933e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.213933e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.908901e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.261898e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.261898e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.237189 sec +TOTAL : 2.233509 sec INFO: No Floating Point Exceptions have been reported - 4,364,643,529 cycles # 1.947 GHz - 8,448,269,915 instructions # 1.94 insn per cycle - 2.242515799 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2897) (512y: 4) (512z: 2751) + 4,417,373,079 cycles # 1.973 GHz + 8,493,082,992 instructions # 1.92 insn per cycle + 2.240143434 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2784) (512y: 4) (512z: 2752) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index 7c0eb4ece3..0ca4814912 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:42:27 +DATE: 2024-08-08_20:09:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.723565e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.180139e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.395798e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.662526e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.213312e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.395769e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.483814 sec +TOTAL : 0.479336 sec INFO: No Floating Point Exceptions have been reported - 2,086,603,150 cycles # 2.947 GHz - 2,996,291,742 instructions # 1.44 insn per cycle - 0.766644042 seconds time elapsed + 2,068,711,068 cycles # 2.929 GHz + 2,952,499,501 instructions # 1.43 insn per cycle + 0.763196119 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 149 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.588532e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.684127e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.684127e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.557673e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.652343e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.652343e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.135203 sec +TOTAL : 4.192940 sec INFO: No Floating Point Exceptions have been reported - 12,570,266,885 cycles # 3.037 GHz - 34,622,435,481 instructions # 2.75 insn per cycle - 4.140528435 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) + 12,602,357,038 cycles # 3.002 GHz + 34,631,326,432 instructions # 2.75 insn per cycle + 4.199620510 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 683) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.480340e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.970700e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.970700e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.457087e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.945109e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.945109e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.999581 sec +TOTAL : 2.017495 sec INFO: No Floating Point Exceptions have been reported - 6,075,791,171 cycles # 3.032 GHz - 14,848,680,325 instructions # 2.44 insn per cycle - 2.005001414 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2993) (avx2: 0) (512y: 0) (512z: 0) + 6,096,552,375 cycles # 3.013 GHz + 14,886,527,681 instructions # 2.44 insn per cycle + 2.024226195 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2980) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.412557e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.265368e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.265368e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.320703e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.178361e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.178361e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.500086 sec +TOTAL : 1.525431 sec INFO: No Floating Point Exceptions have been reported - 4,313,416,167 cycles # 2.867 GHz - 9,055,140,923 instructions # 2.10 insn per cycle - 1.505373819 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4470) (512y: 0) (512z: 0) + 4,362,864,395 cycles # 2.849 GHz + 9,093,170,699 instructions # 2.08 insn per cycle + 1.532091223 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4446) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.580624e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.471236e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.471236e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.442008e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.347351e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.347351e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.467600 sec +TOTAL : 1.505548 sec INFO: No Floating Point Exceptions have been reported - 4,192,369,884 cycles # 2.848 GHz - 8,664,912,975 instructions # 2.07 insn per cycle - 1.473038348 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4243) (512y: 0) (512z: 0) + 4,283,778,078 cycles # 2.834 GHz + 8,707,570,636 instructions # 2.03 insn per cycle + 1.512346731 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4213) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.697028e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.182026e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.182026e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.480199e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.987074e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.987074e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.926016 sec +TOTAL : 2.010348 sec INFO: No Floating Point Exceptions have been reported - 3,834,269,127 cycles # 1.986 GHz - 7,807,487,724 instructions # 2.04 insn per cycle - 1.931378267 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4424) (512y: 0) (512z: 2555) + 3,921,508,341 cycles # 1.945 GHz + 7,849,973,775 instructions # 2.00 insn per cycle + 2.017051814 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4252) (512y: 0) (512z: 2556) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index e900e27558..c66a4f9500 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:42:47 +DATE: 2024-08-08_20:10:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.140621e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.444301e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.723845e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.014498e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.491996e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.727921e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.479825 sec +TOTAL : 0.481358 sec INFO: No Floating Point Exceptions have been reported - 2,082,597,398 cycles # 2.951 GHz - 2,989,924,480 instructions # 1.44 insn per cycle - 0.762031602 seconds time elapsed + 2,037,978,515 cycles # 2.886 GHz + 2,961,010,767 instructions # 1.45 insn per cycle + 0.762837811 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.739102e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.845252e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.845252e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.697323e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.802206e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.802206e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 3.913257 sec +TOTAL : 3.980371 sec INFO: No Floating Point Exceptions have been reported - 11,819,760,720 cycles # 3.017 GHz - 35,092,176,344 instructions # 2.97 insn per cycle - 3.918717089 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) + 11,889,490,017 cycles # 2.983 GHz + 35,106,748,392 instructions # 2.95 insn per cycle + 3.987184887 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.569207e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.078523e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.078523e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.502653e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.994079e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.994079e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.968652 sec +TOTAL : 1.999831 sec INFO: No Floating Point Exceptions have been reported - 5,956,774,806 cycles # 3.019 GHz - 14,470,057,994 instructions # 2.43 insn per cycle - 1.974105545 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) + 5,999,305,364 cycles # 2.992 GHz + 14,506,447,484 instructions # 2.42 insn per cycle + 2.006483206 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2559) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.384560e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.251377e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.251377e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.608204e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.550220e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.550220e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.505860 sec +TOTAL : 1.473214 sec INFO: No Floating Point Exceptions have been reported - 4,150,238,324 cycles # 2.747 GHz - 8,882,886,534 instructions # 2.14 insn per cycle - 1.511296107 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) + 4,213,841,990 cycles # 2.849 GHz + 8,921,034,070 instructions # 2.12 insn per cycle + 1.479975021 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3556) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.444581e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.341630e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.341630e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.485226e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.400149e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.400149e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.495867 sec +TOTAL : 1.496613 sec INFO: No Floating Point Exceptions have been reported - 4,109,698,688 cycles # 2.739 GHz - 8,410,471,582 instructions # 2.05 insn per cycle - 1.501436955 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3314) (512y: 0) (512z: 0) + 4,261,968,497 cycles # 2.836 GHz + 8,450,409,335 instructions # 1.98 insn per cycle + 1.503441367 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3284) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.798030e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.297222e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.297222e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.731827e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.224198e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.224198e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.893929 sec +TOTAL : 1.924845 sec INFO: No Floating Point Exceptions have been reported - 3,787,882,654 cycles # 1.996 GHz - 7,701,397,342 instructions # 2.03 insn per cycle - 1.899199723 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3440) (512y: 0) (512z: 2107) + 3,821,108,888 cycles # 1.979 GHz + 7,740,611,821 instructions # 2.03 insn per cycle + 1.931585644 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3268) (512y: 0) (512z: 2108) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 1d2c49ac8a..9e258a42c8 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:19:03 +DATE: 2024-08-08_19:52:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.880314e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.172219e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.274853e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.928215e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.172881e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273641e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.520012 sec +TOTAL : 0.521950 sec INFO: No Floating Point Exceptions have been reported - 2,202,911,416 cycles # 2.926 GHz - 3,165,598,515 instructions # 1.44 insn per cycle - 0.810414735 seconds time elapsed + 2,213,686,839 cycles # 2.946 GHz + 3,178,577,075 instructions # 1.44 insn per cycle + 0.810096796 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.860309e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.907494e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.907494e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.841341e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.888035e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.888035e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.743741 sec +TOTAL : 5.832684 sec INFO: No Floating Point Exceptions have been reported - 17,418,994,782 cycles # 3.030 GHz - 46,094,189,242 instructions # 2.65 insn per cycle - 5.749395250 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 631) (avx2: 0) (512y: 0) (512z: 0) + 17,545,887,667 cycles # 3.004 GHz + 46,212,560,657 instructions # 2.63 insn per cycle + 5.842093812 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 618) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.315434e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.480321e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.480321e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.270852e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.438233e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.438233e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.270556 sec +TOTAL : 3.344937 sec INFO: No Floating Point Exceptions have been reported - 9,955,949,401 cycles # 3.040 GHz - 27,592,161,577 instructions # 2.77 insn per cycle - 3.275977650 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2593) (avx2: 0) (512y: 0) (512z: 0) + 10,073,495,315 cycles # 3.004 GHz + 27,713,045,845 instructions # 2.75 insn per cycle + 3.354389607 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.259072e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.676561e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.676561e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.229785e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.644944e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.644944e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.101748 sec +TOTAL : 2.142226 sec INFO: No Floating Point Exceptions have been reported - 6,038,149,520 cycles # 2.866 GHz - 12,488,773,306 instructions # 2.07 insn per cycle - 2.107527607 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2780) (512y: 0) (512z: 0) + 6,138,817,492 cycles # 2.854 GHz + 12,602,197,399 instructions # 2.05 insn per cycle + 2.151581868 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2762) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.800248e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.308228e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.308228e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.722165e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.222047e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.222047e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.915933 sec +TOTAL : 1.971142 sec INFO: No Floating Point Exceptions have been reported - 5,504,033,479 cycles # 2.866 GHz - 11,922,925,036 instructions # 2.17 insn per cycle - 1.921444713 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2531) (512y: 146) (512z: 0) + 5,621,798,133 cycles # 2.839 GHz + 12,035,423,234 instructions # 2.14 insn per cycle + 1.980714349 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2507) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.762973e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.968383e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.968383e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.784432e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.992571e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.992571e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.894520 sec +TOTAL : 2.909360 sec INFO: No Floating Point Exceptions have been reported - 5,602,974,880 cycles # 1.933 GHz - 8,113,600,641 instructions # 1.45 insn per cycle - 2.900011376 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1668) (512y: 126) (512z: 1862) + 5,725,311,509 cycles # 1.962 GHz + 8,228,178,315 instructions # 1.44 insn per cycle + 2.919447921 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1646) (512y: 126) (512z: 1862) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index 29bff9cc44..0491e4ed6d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_20:19:27 +DATE: 2024-08-08_19:52:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.909940e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.182050e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.286288e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.017343e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.179179e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.286659e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.521684 sec +TOTAL : 0.519682 sec INFO: No Floating Point Exceptions have been reported - 2,226,569,565 cycles # 2.949 GHz - 3,203,343,455 instructions # 1.44 insn per cycle - 0.811937332 seconds time elapsed + 2,213,688,235 cycles # 2.946 GHz + 3,194,056,853 instructions # 1.44 insn per cycle + 0.808260316 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.889937e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.939639e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.939639e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.869136e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.918050e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.918050e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.656141 sec +TOTAL : 5.752898 sec INFO: No Floating Point Exceptions have been reported - 16,953,958,373 cycles # 2.995 GHz - 45,121,214,574 instructions # 2.66 insn per cycle - 5.661842751 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 582) (avx2: 0) (512y: 0) (512z: 0) + 17,074,104,828 cycles # 2.963 GHz + 45,236,287,915 instructions # 2.65 insn per cycle + 5.764326274 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 569) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.455685e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.638788e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.638788e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.441463e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.626872e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.626872e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.140773 sec +TOTAL : 3.185909 sec INFO: No Floating Point Exceptions have been reported - 9,518,063,498 cycles # 3.026 GHz - 26,244,492,434 instructions # 2.76 insn per cycle - 3.146259863 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2397) (avx2: 0) (512y: 0) (512z: 0) + 9,649,087,118 cycles # 3.020 GHz + 26,365,137,437 instructions # 2.73 insn per cycle + 3.195361891 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2385) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.657640e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.979119e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.979119e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.613455e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.935335e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.935335e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.357004 sec +TOTAL : 2.413480 sec INFO: No Floating Point Exceptions have been reported - 6,725,665,752 cycles # 2.848 GHz - 14,035,144,203 instructions # 2.09 insn per cycle - 2.362636169 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2902) (512y: 0) (512z: 0) + 6,867,786,043 cycles # 2.835 GHz + 14,147,220,960 instructions # 2.06 insn per cycle + 2.423178008 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2884) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.936976e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.299377e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.299377e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.856156e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.210888e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.210888e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.229512 sec +TOTAL : 2.298392 sec INFO: No Floating Point Exceptions have been reported - 6,396,564,093 cycles # 2.863 GHz - 13,527,050,240 instructions # 2.11 insn per cycle - 2.234879866 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2547) (512y: 302) (512z: 0) + 6,526,789,768 cycles # 2.829 GHz + 13,640,691,375 instructions # 2.09 insn per cycle + 2.307759550 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2523) (512y: 302) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.800963e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.008328e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.008328e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.731216e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.937483e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.937483e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.864981 sec +TOTAL : 2.951920 sec INFO: No Floating Point Exceptions have been reported - 5,622,466,186 cycles # 1.960 GHz - 9,214,490,312 instructions # 1.64 insn per cycle - 2.870602335 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1453) (512y: 212) (512z: 2059) + 5,713,181,383 cycles # 1.930 GHz + 9,325,302,677 instructions # 1.63 insn per cycle + 2.961562881 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1431) (512y: 212) (512z: 2059) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 800b42a2f7..f4571b9f6b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-06-28_20:19:52 +DATE: 2024-08-08_19:53:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.784849e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.050628e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.064964e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.927019e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.050993e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.064681e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.469118 sec +TOTAL : 0.466363 sec INFO: No Floating Point Exceptions have been reported - 2,006,582,876 cycles # 2.941 GHz - 2,882,205,133 instructions # 1.44 insn per cycle - 0.742141272 seconds time elapsed + 2,031,704,885 cycles # 2.932 GHz + 2,907,931,480 instructions # 1.43 insn per cycle + 0.749954927 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.108512e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.323620e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.336186e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.108955e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.322519e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.334742e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.603409 sec +TOTAL : 0.601379 sec INFO: No Floating Point Exceptions have been reported - 2,505,659,997 cycles # 2.953 GHz - 3,807,332,090 instructions # 1.52 insn per cycle - 0.909344488 seconds time elapsed + 2,455,141,462 cycles # 2.938 GHz + 3,762,396,340 instructions # 1.53 insn per cycle + 0.893863333 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.487353e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.499535e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.499535e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.481232e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.493616e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.493616e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.610434 sec +TOTAL : 6.623962 sec INFO: No Floating Point Exceptions have been reported - 19,883,753,596 cycles # 3.007 GHz - 59,920,127,931 instructions # 3.01 insn per cycle - 6.614643893 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1212) (avx2: 0) (512y: 0) (512z: 0) + 19,900,544,736 cycles # 3.003 GHz + 59,917,689,995 instructions # 3.01 insn per cycle + 6.628146634 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.544552e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.585762e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.585762e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.692821e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.734716e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.734716e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.630161 sec +TOTAL : 3.511000 sec INFO: No Floating Point Exceptions have been reported - 10,622,833,743 cycles # 2.926 GHz - 31,096,080,168 instructions # 2.93 insn per cycle - 3.634294540 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5233) (avx2: 0) (512y: 0) (512z: 0) + 10,573,188,323 cycles # 3.009 GHz + 31,088,228,992 instructions # 2.94 insn per cycle + 3.514850116 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.447536e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.617462e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.617462e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.311594e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.480158e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.480158e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.756614 sec +TOTAL : 1.779751 sec INFO: No Floating Point Exceptions have been reported - 4,997,708,956 cycles # 2.841 GHz - 11,412,697,579 instructions # 2.28 insn per cycle - 1.763192357 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4653) (512y: 0) (512z: 0) + 4,993,361,094 cycles # 2.801 GHz + 11,406,864,540 instructions # 2.28 insn per cycle + 1.783592873 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4635) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.046283e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.066541e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.066541e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.047569e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.068559e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.068559e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.588128 sec +TOTAL : 1.583863 sec INFO: No Floating Point Exceptions have been reported - 4,445,949,332 cycles # 2.794 GHz - 10,670,541,390 instructions # 2.40 insn per cycle - 1.592278823 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4395) (512y: 91) (512z: 0) + 4,443,684,141 cycles # 2.800 GHz + 10,665,267,804 instructions # 2.40 insn per cycle + 1.587769074 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4371) (512y: 91) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.399503e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.504160e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.504160e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.461711e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.569260e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.569260e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.239033 sec +TOTAL : 2.218169 sec INFO: No Floating Point Exceptions have been reported - 4,137,934,138 cycles # 1.845 GHz - 5,973,596,045 instructions # 1.44 insn per cycle - 2.243189213 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1629) (512y: 95) (512z: 3576) + 4,131,467,216 cycles # 1.860 GHz + 5,968,009,062 instructions # 1.44 insn per cycle + 2.222079730 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1605) (512y: 95) (512z: 3576) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 511a1a868d..a42937504e 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-06-28_20:51:08 +DATE: 2024-08-08_20:18:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -53,15 +53,15 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.590014e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.672492e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.672492e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.687469e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.986061e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.986061e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.500146 sec +TOTAL : 0.493096 sec INFO: No Floating Point Exceptions have been reported - 2,070,151,910 cycles # 2.909 GHz - 3,149,644,102 instructions # 1.52 insn per cycle - 0.770025396 seconds time elapsed + 2,045,059,008 cycles # 2.898 GHz + 3,097,048,003 instructions # 1.51 insn per cycle + 0.762660564 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -79,18 +79,20 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.719854e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.828063e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.828063e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.805866e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.910227e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.910227e+06 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.820483 sec +TOTAL : 0.818307 sec INFO: No Floating Point Exceptions have been reported - 3,187,057,338 cycles # 2.976 GHz - 5,022,211,168 instructions # 1.58 insn per cycle - 1.131275813 seconds time elapsed + 3,140,684,454 cycles # 2.950 GHz + 5,061,508,169 instructions # 1.61 insn per cycle + 1.128278285 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -108,20 +110,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.524282e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.536797e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.536797e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.492873e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.505187e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.505187e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.520579 sec +TOTAL : 6.599351 sec INFO: No Floating Point Exceptions have been reported - 19,909,517,926 cycles # 3.052 GHz - 59,928,033,518 instructions # 3.01 insn per cycle - 6.524962185 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1212) (avx2: 0) (512y: 0) (512z: 0) + 19,933,005,895 cycles # 3.019 GHz + 59,920,307,427 instructions # 3.01 insn per cycle + 6.603770814 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -137,20 +140,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.686960e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.730010e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.730010e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.695185e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.737821e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.737821e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.526110 sec +TOTAL : 3.515055 sec INFO: No Floating Point Exceptions have been reported - 10,658,484,150 cycles # 3.020 GHz - 31,144,317,033 instructions # 2.92 insn per cycle - 3.530650848 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5233) (avx2: 0) (512y: 0) (512z: 0) + 10,602,064,942 cycles # 3.013 GHz + 31,134,275,582 instructions # 2.94 insn per cycle + 3.519385575 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -166,20 +170,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.376823e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.547869e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.547869e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.301392e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.470755e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.470755e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.778728 sec +TOTAL : 1.788543 sec INFO: No Floating Point Exceptions have been reported - 5,043,044,785 cycles # 2.829 GHz - 11,463,079,799 instructions # 2.27 insn per cycle - 1.783508677 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4653) (512y: 0) (512z: 0) + 5,028,204,629 cycles # 2.805 GHz + 11,455,559,201 instructions # 2.28 insn per cycle + 1.792981978 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4635) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -195,20 +200,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.067257e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.088583e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.088583e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.050919e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.072418e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.072418e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.565555 sec +TOTAL : 1.585500 sec INFO: No Floating Point Exceptions have been reported - 4,485,484,739 cycles # 2.858 GHz - 10,721,399,115 instructions # 2.39 insn per cycle - 1.570106895 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4395) (512y: 91) (512z: 0) + 4,477,945,053 cycles # 2.818 GHz + 10,713,475,732 instructions # 2.39 insn per cycle + 1.589826674 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4371) (512y: 91) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -224,20 +230,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.460504e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.573111e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.573111e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.347709e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.453074e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.453074e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.229247 sec +TOTAL : 2.257984 sec INFO: No Floating Point Exceptions have been reported - 4,189,282,452 cycles # 1.876 GHz - 6,014,301,160 instructions # 1.44 insn per cycle - 2.233707766 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1629) (512y: 95) (512z: 3576) + 4,161,878,306 cycles # 1.840 GHz + 6,004,301,884 instructions # 1.44 insn per cycle + 2.262398569 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1605) (512y: 95) (512z: 3576) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index 41d4b9b4cc..6efe0f69f4 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-06-28_20:20:17 +DATE: 2024-08-08_19:53:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.764297e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.047002e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.061437e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.841089e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.040503e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.053751e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.473798 sec +TOTAL : 0.462910 sec INFO: No Floating Point Exceptions have been reported - 1,951,625,584 cycles # 2.812 GHz - 2,796,879,640 instructions # 1.43 insn per cycle - 0.755787707 seconds time elapsed + 2,010,149,699 cycles # 2.952 GHz + 2,896,854,048 instructions # 1.44 insn per cycle + 0.738052118 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.103167e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.314097e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.326586e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.107639e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.318401e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.329750e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.594021 sec +TOTAL : 0.598813 sec INFO: No Floating Point Exceptions have been reported - 2,462,107,240 cycles # 2.964 GHz - 3,775,116,185 instructions # 1.53 insn per cycle - 0.889360809 seconds time elapsed + 2,457,830,026 cycles # 2.951 GHz + 3,751,049,656 instructions # 1.53 insn per cycle + 0.893099521 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.505210e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.517375e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.517375e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.489979e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.502462e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.502462e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.563107 sec +TOTAL : 6.600482 sec INFO: No Floating Point Exceptions have been reported - 19,911,970,043 cycles # 3.032 GHz - 60,133,794,458 instructions # 3.02 insn per cycle - 6.567254097 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1335) (avx2: 0) (512y: 0) (512z: 0) + 19,968,279,527 cycles # 3.024 GHz + 60,133,262,996 instructions # 3.01 insn per cycle + 6.604278291 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1322) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.806305e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.849646e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.849646e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.723867e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.766716e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.766716e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.430797 sec +TOTAL : 3.487862 sec INFO: No Floating Point Exceptions have been reported - 10,444,425,353 cycles # 3.041 GHz - 30,694,173,313 instructions # 2.94 insn per cycle - 3.435027589 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5059) (avx2: 0) (512y: 0) (512z: 0) + 10,481,040,414 cycles # 3.003 GHz + 30,690,087,380 instructions # 2.93 insn per cycle + 3.491637208 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5047) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.172198e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.332764e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.332764e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.840811e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.994004e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.994004e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.808464 sec +TOTAL : 1.873663 sec INFO: No Floating Point Exceptions have been reported - 5,137,237,567 cycles # 2.835 GHz - 11,845,239,071 instructions # 2.31 insn per cycle - 1.812718837 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4759) (512y: 0) (512z: 0) + 5,129,466,442 cycles # 2.733 GHz + 11,839,868,923 instructions # 2.31 insn per cycle + 1.877504725 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4741) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.884560e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.007012e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.007012e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.982969e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.017062e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.017062e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.680647 sec +TOTAL : 1.660972 sec INFO: No Floating Point Exceptions have been reported - 4,727,101,217 cycles # 2.807 GHz - 11,170,633,130 instructions # 2.36 insn per cycle - 1.687326475 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4420) (512y: 245) (512z: 0) + 4,713,444,499 cycles # 2.833 GHz + 11,164,953,266 instructions # 2.37 insn per cycle + 1.664821518 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4396) (512y: 245) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.212331e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.313838e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.313838e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.457192e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.563104e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.563104e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.296456 sec +TOTAL : 2.218804 sec INFO: No Floating Point Exceptions have been reported - 4,164,500,139 cycles # 1.811 GHz - 6,225,483,611 instructions # 1.49 insn per cycle - 2.302194133 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1525) (512y: 140) (512z: 3678) + 4,152,440,872 cycles # 1.869 GHz + 6,219,243,593 instructions # 1.50 insn per cycle + 2.222530673 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1501) (512y: 140) (512z: 3678) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 31a9a4ac3b..f6f4702d8b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-06-28_20:20:42 +DATE: 2024-08-08_19:53:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.235609e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.904693e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.979120e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.320062e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.967518e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.041410e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.449760 sec +TOTAL : 0.444288 sec INFO: No Floating Point Exceptions have been reported - 1,987,597,220 cycles # 2.945 GHz - 2,797,236,541 instructions # 1.41 insn per cycle - 0.739644538 seconds time elapsed + 1,959,595,734 cycles # 2.963 GHz + 2,777,994,587 instructions # 1.42 insn per cycle + 0.717899732 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 227 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.995131e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.899194e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.956559e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.069470e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.919373e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.975617e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 -TOTAL : 0.496488 sec +TOTAL : 0.495533 sec INFO: No Floating Point Exceptions have been reported - 2,159,378,990 cycles # 2.950 GHz - 3,070,658,648 instructions # 1.42 insn per cycle - 0.788842582 seconds time elapsed + 2,156,454,732 cycles # 2.941 GHz + 3,086,518,049 instructions # 1.43 insn per cycle + 0.790560540 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.597401e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.610500e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.610500e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.572191e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.585337e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.585337e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.330423 sec +TOTAL : 6.388092 sec INFO: No Floating Point Exceptions have been reported - 19,241,703,454 cycles # 3.039 GHz - 59,623,316,700 instructions # 3.10 insn per cycle - 6.335537576 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 972) (avx2: 0) (512y: 0) (512z: 0) + 19,202,614,309 cycles # 3.005 GHz + 59,612,894,743 instructions # 3.10 insn per cycle + 6.392159520 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.293498e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.432828e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.432828e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.292655e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.433094e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.433094e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.996199 sec +TOTAL : 1.992839 sec INFO: No Floating Point Exceptions have been reported - 6,021,787,246 cycles # 3.012 GHz - 17,069,232,964 instructions # 2.83 insn per cycle - 2.002865216 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5867) (avx2: 0) (512y: 0) (512z: 0) + 6,013,924,550 cycles # 3.013 GHz + 17,061,326,868 instructions # 2.84 insn per cycle + 1.996457314 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5855) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.803557e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.866475e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.866475e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.800495e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.863232e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.863232e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.928687 sec +TOTAL : 0.927310 sec INFO: No Floating Point Exceptions have been reported - 2,639,576,993 cycles # 2.831 GHz - 6,193,698,724 instructions # 2.35 insn per cycle - 0.934212919 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5109) (512y: 0) (512z: 0) + 2,629,891,219 cycles # 2.827 GHz + 6,187,073,232 instructions # 2.35 insn per cycle + 0.930846209 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5091) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.001387e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.081356e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.081356e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.976191e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.051455e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.051455e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.839723 sec +TOTAL : 0.846370 sec INFO: No Floating Point Exceptions have been reported - 2,406,487,036 cycles # 2.857 GHz - 5,797,568,703 instructions # 2.41 insn per cycle - 0.844635688 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4920) (512y: 36) (512z: 0) + 2,395,634,403 cycles # 2.821 GHz + 5,790,356,055 instructions # 2.42 insn per cycle + 0.849905167 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4896) (512y: 36) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.514820e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.561506e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.561506e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.518605e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.563959e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.563959e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.105903 sec +TOTAL : 1.098394 sec INFO: No Floating Point Exceptions have been reported - 2,082,755,723 cycles # 1.879 GHz - 3,398,336,985 instructions # 1.63 insn per cycle - 1.112213871 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2238) (512y: 39) (512z: 3787) + 2,076,123,552 cycles # 1.885 GHz + 3,391,311,970 instructions # 1.63 insn per cycle + 1.102116086 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2214) (512y: 39) (512z: 3787) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index ec2543c7d3..38bf1cd9c0 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-06-28_20:51:33 +DATE: 2024-08-08_20:18:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -53,15 +53,15 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.709545e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.924352e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.924352e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.003824e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.049696e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.049696e+07 ) sec^-1 MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.461246 sec +TOTAL : 0.462593 sec INFO: No Floating Point Exceptions have been reported - 1,992,722,705 cycles # 2.958 GHz - 2,887,820,357 instructions # 1.45 insn per cycle - 0.730517540 seconds time elapsed + 1,974,680,886 cycles # 2.933 GHz + 2,925,643,074 instructions # 1.48 insn per cycle + 0.731432096 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -79,18 +79,20 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.683273e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.454394e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.454394e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.700147e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.536036e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.536036e+07 ) sec^-1 MeanMatrixElemValue = ( 6.737499e+02 +- 4.776369e+02 ) GeV^-2 -TOTAL : 0.643585 sec +TOTAL : 0.641753 sec INFO: No Floating Point Exceptions have been reported - 2,581,491,261 cycles # 2.948 GHz - 3,956,724,353 instructions # 1.53 insn per cycle - 0.932697919 seconds time elapsed + 2,565,792,794 cycles # 2.944 GHz + 3,938,395,338 instructions # 1.53 insn per cycle + 0.930086671 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -108,20 +110,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.609102e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.622760e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.622760e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.551720e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.564557e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.564557e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.305130 sec +TOTAL : 6.442209 sec INFO: No Floating Point Exceptions have been reported - 19,239,711,426 cycles # 3.050 GHz - 59,623,711,066 instructions # 3.10 insn per cycle - 6.309396622 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 972) (avx2: 0) (512y: 0) (512z: 0) + 19,332,196,535 cycles # 2.999 GHz + 59,617,412,156 instructions # 3.08 insn per cycle + 6.446330406 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -137,20 +140,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.217420e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.356633e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.356633e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.229338e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.368673e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.368673e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 2.018951 sec +TOTAL : 2.012620 sec INFO: No Floating Point Exceptions have been reported - 6,043,369,730 cycles # 2.989 GHz - 17,117,928,718 instructions # 2.83 insn per cycle - 2.023188312 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5867) (avx2: 0) (512y: 0) (512z: 0) + 6,036,126,177 cycles # 2.994 GHz + 17,109,389,715 instructions # 2.83 insn per cycle + 2.016763535 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5855) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -166,20 +170,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.791038e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.854876e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.854876e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.740859e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.806079e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.806079e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.940101 sec +TOTAL : 0.964100 sec INFO: No Floating Point Exceptions have been reported - 2,664,025,945 cycles # 2.826 GHz - 6,230,939,077 instructions # 2.34 insn per cycle - 0.944279490 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5109) (512y: 0) (512z: 0) + 2,661,000,573 cycles # 2.750 GHz + 6,223,355,528 instructions # 2.34 insn per cycle + 0.968303872 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5091) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -195,20 +200,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.978050e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.055457e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.055457e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.800266e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.868707e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.868707e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.852829 sec +TOTAL : 0.933168 sec INFO: No Floating Point Exceptions have been reported - 2,424,382,397 cycles # 2.831 GHz - 5,834,398,104 instructions # 2.41 insn per cycle - 0.857053275 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4920) (512y: 36) (512z: 0) + 2,423,820,124 cycles # 2.587 GHz + 5,827,757,074 instructions # 2.40 insn per cycle + 0.937581508 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4896) (512y: 36) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -224,20 +230,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.506314e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.552051e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.552051e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.427750e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.470264e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.470264e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.115436 sec +TOTAL : 1.172250 sec INFO: No Floating Point Exceptions have been reported - 2,104,767,379 cycles # 1.881 GHz - 3,439,635,841 instructions # 1.63 insn per cycle - 1.119713640 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2238) (512y: 39) (512z: 3787) + 2,098,127,039 cycles # 1.785 GHz + 3,432,639,908 instructions # 1.64 insn per cycle + 1.176441537 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2214) (512y: 39) (512z: 3787) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index e3957db8ae..0ba4eb9609 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-06-28_20:21:02 +DATE: 2024-08-08_19:54:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.238812e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.928064e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.007026e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.278251e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.942254e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.021816e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.448531 sec +TOTAL : 0.446242 sec INFO: No Floating Point Exceptions have been reported - 1,951,411,968 cycles # 2.941 GHz - 2,766,558,002 instructions # 1.42 insn per cycle - 0.764509940 seconds time elapsed + 1,972,500,118 cycles # 2.943 GHz + 2,795,935,059 instructions # 1.42 insn per cycle + 0.726942838 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 221 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.030678e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.944358e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.002516e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.087674e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.947916e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.002420e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 -TOTAL : 0.503686 sec +TOTAL : 0.494089 sec INFO: No Floating Point Exceptions have been reported - 2,094,025,202 cycles # 2.866 GHz - 2,990,436,652 instructions # 1.43 insn per cycle - 0.789461074 seconds time elapsed + 2,134,934,271 cycles # 2.953 GHz + 3,048,352,562 instructions # 1.43 insn per cycle + 0.779729616 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.571820e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.584887e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.584887e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.547958e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.560826e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.560826e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.392812 sec +TOTAL : 6.448288 sec INFO: No Floating Point Exceptions have been reported - 19,438,042,046 cycles # 3.039 GHz - 59,356,933,962 instructions # 3.05 insn per cycle - 6.398066818 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1040) (avx2: 0) (512y: 0) (512z: 0) + 19,391,308,595 cycles # 3.006 GHz + 59,353,270,013 instructions # 3.06 insn per cycle + 6.452193679 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.739600e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.892454e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.892454e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.669188e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.820622e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.820622e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.895118 sec +TOTAL : 1.907127 sec INFO: No Floating Point Exceptions have been reported - 5,763,819,561 cycles # 3.036 GHz - 16,856,373,051 instructions # 2.92 insn per cycle - 1.901175393 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5622) (avx2: 0) (512y: 0) (512z: 0) + 5,746,722,793 cycles # 3.009 GHz + 16,850,100,573 instructions # 2.93 insn per cycle + 1.910695363 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5610) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.580567e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.629147e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.629147e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.563334e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.611066e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.611066e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.057144 sec +TOTAL : 1.065485 sec INFO: No Floating Point Exceptions have been reported - 3,018,223,480 cycles # 2.845 GHz - 6,854,687,139 instructions # 2.27 insn per cycle - 1.063400892 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5739) (512y: 0) (512z: 0) + 3,007,335,634 cycles # 2.814 GHz + 6,847,154,679 instructions # 2.28 insn per cycle + 1.069270257 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5721) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.696812e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.752365e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.752365e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.689887e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.745378e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.745378e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.986126 sec +TOTAL : 0.986999 sec INFO: No Floating Point Exceptions have been reported - 2,807,740,600 cycles # 2.837 GHz - 6,444,005,515 instructions # 2.30 insn per cycle - 0.993303282 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5521) (512y: 22) (512z: 0) + 2,801,128,869 cycles # 2.830 GHz + 6,436,964,591 instructions # 2.30 insn per cycle + 0.990525270 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5497) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.410526e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.449814e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.449814e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.390544e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.428498e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.428498e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.184868 sec +TOTAL : 1.197863 sec INFO: No Floating Point Exceptions have been reported - 2,258,261,444 cycles # 1.902 GHz - 3,761,765,213 instructions # 1.67 insn per cycle - 1.189729512 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2469) (512y: 29) (512z: 4082) + 2,249,856,205 cycles # 1.874 GHz + 3,755,019,516 instructions # 1.67 insn per cycle + 1.201521180 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2445) (512y: 29) (512z: 4082) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 23d2b99348..b56fab2636 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-06-28_20:21:23 +DATE: 2024-08-08_19:54:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.717161e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.040648e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.055523e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.873225e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.048994e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.062769e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.465962 sec +TOTAL : 0.468393 sec INFO: No Floating Point Exceptions have been reported - 2,036,521,358 cycles # 2.930 GHz - 2,917,582,884 instructions # 1.43 insn per cycle - 0.765122552 seconds time elapsed + 2,013,463,276 cycles # 2.926 GHz + 2,843,704,920 instructions # 1.41 insn per cycle + 0.746969806 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.106058e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.316620e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.329020e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.105683e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.317981e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.329407e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.602784 sec +TOTAL : 0.602858 sec INFO: No Floating Point Exceptions have been reported - 2,487,016,630 cycles # 2.958 GHz - 3,810,655,447 instructions # 1.53 insn per cycle - 0.899326894 seconds time elapsed + 2,481,502,789 cycles # 2.952 GHz + 3,777,860,843 instructions # 1.52 insn per cycle + 0.899194246 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.465065e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.477007e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.477007e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.428536e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.440162e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.440162e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.670736 sec +TOTAL : 6.766520 sec INFO: No Floating Point Exceptions have been reported - 20,214,364,605 cycles # 3.030 GHz - 60,955,289,572 instructions # 3.02 insn per cycle - 6.676699465 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1233) (avx2: 0) (512y: 0) (512z: 0) + 20,196,006,274 cycles # 2.983 GHz + 60,947,190,146 instructions # 3.02 insn per cycle + 6.770695543 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1220) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.688585e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.732660e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.732660e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.786932e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.830680e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.830680e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.517021 sec +TOTAL : 3.442084 sec INFO: No Floating Point Exceptions have been reported - 10,461,894,641 cycles # 2.972 GHz - 30,831,983,788 instructions # 2.95 insn per cycle - 3.523187933 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5362) (avx2: 0) (512y: 0) (512z: 0) + 10,443,979,206 cycles # 3.032 GHz + 30,824,270,405 instructions # 2.95 insn per cycle + 3.445851321 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5350) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.571199e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.744609e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.744609e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.470779e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.644870e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.644870e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.735065 sec +TOTAL : 1.749981 sec INFO: No Floating Point Exceptions have been reported - 4,952,568,801 cycles # 2.850 GHz - 11,366,247,235 instructions # 2.30 insn per cycle - 1.741580355 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4782) (512y: 0) (512z: 0) + 4,950,819,939 cycles # 2.824 GHz + 11,360,637,335 instructions # 2.29 insn per cycle + 1.753761622 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4764) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.082064e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.103867e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.103867e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.072349e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.094125e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.094125e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.540104 sec +TOTAL : 1.547382 sec INFO: No Floating Point Exceptions have been reported - 4,393,896,240 cycles # 2.853 GHz - 10,616,997,940 instructions # 2.42 insn per cycle - 1.544265101 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4515) (512y: 83) (512z: 0) + 4,393,258,157 cycles # 2.833 GHz + 10,610,345,317 instructions # 2.42 insn per cycle + 1.551099869 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4491) (512y: 83) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.307174e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.408952e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.408952e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.179185e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.278821e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.278821e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.267048 sec +TOTAL : 2.303939 sec INFO: No Floating Point Exceptions have been reported - 4,251,639,994 cycles # 1.873 GHz - 6,173,180,709 instructions # 1.45 insn per cycle - 2.272859583 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2141) (512y: 117) (512z: 3652) + 4,243,069,453 cycles # 1.839 GHz + 6,166,943,639 instructions # 1.45 insn per cycle + 2.307918272 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2117) (512y: 117) (512z: 3652) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index 8e7f8fcace..02b75df755 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-06-28_20:21:48 +DATE: 2024-08-08_19:54:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.687531e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.040941e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.054772e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.792781e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.038946e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.052598e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.466409 sec +TOTAL : 0.468036 sec INFO: No Floating Point Exceptions have been reported - 2,036,972,851 cycles # 2.951 GHz - 2,896,753,142 instructions # 1.42 insn per cycle - 0.912477306 seconds time elapsed + 1,985,001,604 cycles # 2.907 GHz + 2,766,137,748 instructions # 1.39 insn per cycle + 0.741175013 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.101045e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.308119e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.319880e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.100333e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.310665e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.321752e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.598149 sec +TOTAL : 0.598767 sec INFO: No Floating Point Exceptions have been reported - 2,466,725,732 cycles # 2.955 GHz - 3,804,698,508 instructions # 1.54 insn per cycle - 0.893566745 seconds time elapsed + 2,453,028,425 cycles # 2.950 GHz + 3,661,775,107 instructions # 1.49 insn per cycle + 0.892773102 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.451478e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.463107e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.463107e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.443765e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.455326e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.455326e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.707286 sec +TOTAL : 6.725300 sec INFO: No Floating Point Exceptions have been reported - 20,287,610,611 cycles # 3.023 GHz - 61,178,057,539 instructions # 3.02 insn per cycle - 6.713126605 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1285) (avx2: 0) (512y: 0) (512z: 0) + 20,276,202,254 cycles # 3.014 GHz + 61,176,047,563 instructions # 3.02 insn per cycle + 6.729394202 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1272) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.862832e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.908536e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.908536e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.782126e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.826623e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.826623e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.391985 sec +TOTAL : 3.445491 sec INFO: No Floating Point Exceptions have been reported - 10,320,455,746 cycles # 3.040 GHz - 30,541,990,731 instructions # 2.96 insn per cycle - 3.398241448 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5166) (avx2: 0) (512y: 0) (512z: 0) + 10,362,676,163 cycles # 3.005 GHz + 30,536,337,790 instructions # 2.95 insn per cycle + 3.449270850 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5154) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.179015e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.338205e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.338205e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.061590e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.221412e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.221412e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.807157 sec +TOTAL : 1.828348 sec INFO: No Floating Point Exceptions have been reported - 5,150,435,949 cycles # 2.844 GHz - 11,880,396,494 instructions # 2.31 insn per cycle - 1.815550042 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4893) (512y: 0) (512z: 0) + 5,140,078,208 cycles # 2.807 GHz + 11,874,984,280 instructions # 2.31 insn per cycle + 1.832218653 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4875) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.012527e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.031498e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.031498e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.004120e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.023004e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.023004e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.641252 sec +TOTAL : 1.651331 sec INFO: No Floating Point Exceptions have been reported - 4,676,385,612 cycles # 2.845 GHz - 11,173,497,252 instructions # 2.39 insn per cycle - 1.647761659 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4520) (512y: 238) (512z: 0) + 4,668,851,118 cycles # 2.822 GHz + 11,168,266,795 instructions # 2.39 insn per cycle + 1.655171295 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4496) (512y: 238) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.320732e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.423530e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.423530e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.200167e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.298361e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.298361e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.262199 sec +TOTAL : 2.297641 sec INFO: No Floating Point Exceptions have been reported - 4,261,802,804 cycles # 1.881 GHz - 6,412,763,401 instructions # 1.50 insn per cycle - 2.269882721 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2037) (512y: 163) (512z: 3730) + 4,253,384,705 cycles # 1.849 GHz + 6,407,420,579 instructions # 1.51 insn per cycle + 2.301529661 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2013) (512y: 163) (512z: 3730) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 606ff35de6..ab0ea6da4a 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:22:13 +DATE: 2024-08-08_19:55:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.459828e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.485766e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.487928e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.488153e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.514881e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.516998e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.524789 sec +TOTAL : 0.525204 sec INFO: No Floating Point Exceptions have been reported - 2,242,199,367 cycles # 2.960 GHz - 3,521,289,668 instructions # 1.57 insn per cycle - 1.006468673 seconds time elapsed + 2,218,473,016 cycles # 2.933 GHz + 3,463,122,045 instructions # 1.56 insn per cycle + 0.815780769 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.137707e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.166699e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.167947e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.132223e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.161610e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.162761e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.031945 sec +TOTAL : 3.033638 sec INFO: No Floating Point Exceptions have been reported - 9,954,167,339 cycles # 3.019 GHz - 21,948,236,097 instructions # 2.20 insn per cycle - 3.356435303 seconds time elapsed + 9,809,726,664 cycles # 2.987 GHz + 20,834,555,403 instructions # 2.12 insn per cycle + 3.343721812 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.942202e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.943197e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.943197e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.933106e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.934097e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934097e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.455078 sec +TOTAL : 8.490765 sec INFO: No Floating Point Exceptions have been reported - 25,685,427,078 cycles # 3.037 GHz - 78,966,363,864 instructions # 3.07 insn per cycle - 8.461150899 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4856) (avx2: 0) (512y: 0) (512z: 0) + 25,657,464,355 cycles # 3.021 GHz + 78,956,678,283 instructions # 3.08 insn per cycle + 8.494928864 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4843) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.637691e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.640965e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.640965e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.556899e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.560135e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.560135e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.519192 sec +TOTAL : 4.617381 sec INFO: No Floating Point Exceptions have been reported - 13,125,941,934 cycles # 2.903 GHz - 39,566,682,586 instructions # 3.01 insn per cycle - 4.525791931 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13211) (avx2: 0) (512y: 0) (512z: 0) + 13,096,002,004 cycles # 2.834 GHz + 39,560,686,282 instructions # 3.02 insn per cycle + 4.621306822 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.347155e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.363965e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.363965e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.312969e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.330861e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.330861e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.975226 sec +TOTAL : 1.979952 sec INFO: No Floating Point Exceptions have been reported - 5,639,645,611 cycles # 2.852 GHz - 13,831,852,045 instructions # 2.45 insn per cycle - 1.981608604 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11548) (512y: 0) (512z: 0) + 5,592,710,730 cycles # 2.820 GHz + 13,825,002,673 instructions # 2.47 insn per cycle + 1.983978333 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11530) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.876912e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.896755e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.896755e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.448686e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.470931e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.470931e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.858025 sec +TOTAL : 1.742543 sec INFO: No Floating Point Exceptions have been reported - 4,952,320,771 cycles # 2.661 GHz - 12,513,090,040 instructions # 2.53 insn per cycle - 1.864384055 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10473) (512y: 88) (512z: 0) + 4,950,283,084 cycles # 2.836 GHz + 12,507,380,266 instructions # 2.53 insn per cycle + 1.746261350 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10449) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.393007e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.406397e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.406397e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.208746e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.222007e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.222007e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.229067 sec +TOTAL : 2.282175 sec INFO: No Floating Point Exceptions have been reported - 4,149,880,862 cycles # 1.859 GHz - 6,398,246,742 instructions # 1.54 insn per cycle - 2.235781935 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1998) (512y: 102) (512z: 9391) + 4,146,883,314 cycles # 1.815 GHz + 6,393,760,552 instructions # 1.54 insn per cycle + 2.285979679 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1974) (512y: 102) (512z: 9391) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 3e1ba5193c..9aa087c04f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:52:20 +DATE: 2024-08-08_20:19:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -53,15 +53,15 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.138565e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.475918e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.475918e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.112227e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.443687e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.443687e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.517848 sec +TOTAL : 0.518381 sec INFO: No Floating Point Exceptions have been reported - 2,215,015,851 cycles # 2.973 GHz - 3,497,258,009 instructions # 1.58 insn per cycle - 0.806722822 seconds time elapsed + 2,176,799,915 cycles # 2.911 GHz + 3,495,470,615 instructions # 1.61 insn per cycle + 0.808139854 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -79,18 +79,20 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.650886e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.124920e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.124920e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.648774e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.128576e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.128576e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.300589 sec +TOTAL : 3.310822 sec INFO: No Floating Point Exceptions have been reported - 10,784,891,679 cycles # 3.015 GHz - 23,660,211,568 instructions # 2.19 insn per cycle - 3.633362684 seconds time elapsed + 10,679,469,031 cycles # 2.985 GHz + 23,830,814,413 instructions # 2.23 insn per cycle + 3.633830469 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -108,20 +110,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.954605e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.955629e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.955629e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.923317e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.924229e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.924229e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.405087 sec +TOTAL : 8.538018 sec INFO: No Floating Point Exceptions have been reported - 25,704,111,857 cycles # 3.057 GHz - 78,968,382,574 instructions # 3.07 insn per cycle - 8.409623563 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4856) (avx2: 0) (512y: 0) (512z: 0) + 25,699,355,856 cycles # 3.009 GHz + 78,962,606,878 instructions # 3.07 insn per cycle + 8.542523167 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4843) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -137,20 +140,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.650719e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.654171e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.654171e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.605150e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.608587e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.608587e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.506089 sec +TOTAL : 4.559554 sec INFO: No Floating Point Exceptions have been reported - 13,135,898,885 cycles # 2.913 GHz - 39,579,018,476 instructions # 3.01 insn per cycle - 4.510608241 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13211) (avx2: 0) (512y: 0) (512z: 0) + 13,117,342,563 cycles # 2.875 GHz + 39,574,473,831 instructions # 3.02 insn per cycle + 4.563915289 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -166,20 +170,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.359873e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.377612e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.377612e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.187581e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.204828e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.204828e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.975786 sec +TOTAL : 2.014036 sec INFO: No Floating Point Exceptions have been reported - 5,657,211,223 cycles # 2.858 GHz - 13,841,168,100 instructions # 2.45 insn per cycle - 1.980284784 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11548) (512y: 0) (512z: 0) + 5,605,896,422 cycles # 2.779 GHz + 13,833,979,214 instructions # 2.47 insn per cycle + 2.018562637 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11530) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -195,20 +200,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.530978e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.555255e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.555255e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.243444e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.265975e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.265975e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.734840 sec +TOTAL : 1.784658 sec INFO: No Floating Point Exceptions have been reported - 4,965,921,813 cycles # 2.857 GHz - 12,523,097,371 instructions # 2.52 insn per cycle - 1.739276354 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10473) (512y: 88) (512z: 0) + 4,964,309,016 cycles # 2.776 GHz + 12,516,237,329 instructions # 2.52 insn per cycle + 1.788990266 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10449) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -224,20 +230,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.449141e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.463773e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.463773e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.077629e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.090790e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.090790e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.215526 sec +TOTAL : 2.328055 sec INFO: No Floating Point Exceptions have been reported - 4,165,473,123 cycles # 1.877 GHz - 6,409,134,884 instructions # 1.54 insn per cycle - 2.220135516 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1998) (512y: 102) (512z: 9391) + 4,162,316,275 cycles # 1.785 GHz + 6,401,996,872 instructions # 1.54 insn per cycle + 2.332653341 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1974) (512y: 102) (512z: 9391) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index 36e35a9f65..ff7f772058 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_21:01:57 +DATE: 2024-08-08_20:29:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.474154e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.501832e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.503997e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.507693e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.534445e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.536631e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.511354 sec +TOTAL : 0.514407 sec INFO: No Floating Point Exceptions have been reported - 2,188,138,832 cycles # 2.950 GHz - 3,455,450,866 instructions # 1.58 insn per cycle - 0.803741465 seconds time elapsed + 2,174,406,271 cycles # 2.930 GHz + 3,461,893,969 instructions # 1.59 insn per cycle + 0.803766234 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.148640e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.179495e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.180800e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.147428e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.177075e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.178326e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.129383 sec +TOTAL : 3.120976 sec INFO: No Floating Point Exceptions have been reported - 10,087,636,146 cycles # 2.985 GHz - 20,676,039,095 instructions # 2.05 insn per cycle - 3.437978751 seconds time elapsed + 10,019,214,394 cycles # 2.972 GHz + 21,025,350,474 instructions # 2.10 insn per cycle + 3.430265997 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.953051e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.954062e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.954062e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.913744e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.914711e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.914711e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.408463 sec +TOTAL : 8.577743 sec INFO: No Floating Point Exceptions have been reported - 25,683,043,283 cycles # 3.053 GHz - 78,961,949,285 instructions # 3.07 insn per cycle - 8.412560493 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4856) (avx2: 0) (512y: 0) (512z: 0) + 25,670,651,990 cycles # 2.992 GHz + 78,955,406,875 instructions # 3.08 insn per cycle + 8.581763598 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4843) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.538179e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.541408e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.541408e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.605176e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.608431e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.608431e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.646642 sec +TOTAL : 4.556655 sec INFO: No Floating Point Exceptions have been reported - 13,132,428,466 cycles # 2.824 GHz - 39,566,359,526 instructions # 3.01 insn per cycle - 4.650849397 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13211) (avx2: 0) (512y: 0) (512z: 0) + 13,109,013,329 cycles # 2.875 GHz + 39,558,662,551 instructions # 3.02 insn per cycle + 4.560750410 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.368365e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.385826e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.385826e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.281071e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.297965e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.297965e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.971160 sec +TOTAL : 1.988611 sec INFO: No Floating Point Exceptions have been reported - 5,647,787,452 cycles # 2.861 GHz - 13,829,507,630 instructions # 2.45 insn per cycle - 1.975214929 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11548) (512y: 0) (512z: 0) + 5,595,768,969 cycles # 2.809 GHz + 13,822,292,745 instructions # 2.47 insn per cycle + 1.992702302 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11530) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.502828e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.525337e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.525337e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.896901e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.917572e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.917572e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.736925 sec +TOTAL : 1.851324 sec INFO: No Floating Point Exceptions have been reported - 4,952,829,535 cycles # 2.846 GHz - 12,510,405,299 instructions # 2.53 insn per cycle - 1.741019325 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10473) (512y: 88) (512z: 0) + 4,949,173,347 cycles # 2.669 GHz + 12,503,287,563 instructions # 2.53 insn per cycle + 1.855415164 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10449) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.365853e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.379911e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.379911e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.307417e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.320405e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.320405e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.237795 sec +TOTAL : 2.252212 sec INFO: No Floating Point Exceptions have been reported - 4,156,890,577 cycles # 1.855 GHz - 6,396,158,206 instructions # 1.54 insn per cycle - 2.242043953 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1998) (512y: 102) (512z: 9391) + 4,148,121,362 cycles # 1.839 GHz + 6,388,958,727 instructions # 1.54 insn per cycle + 2.256422988 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1974) (512y: 102) (512z: 9391) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index 207f46b632..8c55b22907 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:59:12 +DATE: 2024-08-08_20:26:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.461876e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.490862e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.493015e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.458961e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.485253e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.488049e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.509960 sec +TOTAL : 0.514464 sec INFO: No Floating Point Exceptions have been reported - 2,189,426,255 cycles # 2.953 GHz - 3,479,572,247 instructions # 1.59 insn per cycle - 0.800831207 seconds time elapsed + 2,130,639,833 cycles # 2.860 GHz + 3,343,542,179 instructions # 1.57 insn per cycle + 0.805221680 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.145425e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.176585e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.177885e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.127051e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.156110e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.157363e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.069887 sec +TOTAL : 3.075386 sec INFO: No Floating Point Exceptions have been reported - 9,983,044,404 cycles # 3.007 GHz - 22,599,909,333 instructions # 2.26 insn per cycle - 3.378270496 seconds time elapsed + 9,595,195,883 cycles # 2.879 GHz + 21,169,008,885 instructions # 2.21 insn per cycle + 3.388723748 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.948901e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.949866e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.949866e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.853624e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.854505e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.854505e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.424966 sec +TOTAL : 8.854273 sec INFO: No Floating Point Exceptions have been reported - 25,683,040,628 cycles # 3.047 GHz - 78,961,831,753 instructions # 3.07 insn per cycle - 8.429259681 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4856) (avx2: 0) (512y: 0) (512z: 0) + 25,673,092,183 cycles # 2.899 GHz + 78,956,489,516 instructions # 3.08 insn per cycle + 8.858619563 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4843) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.653114e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.656450e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.656450e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.555877e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.559175e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.559175e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.498779 sec +TOTAL : 4.618236 sec INFO: No Floating Point Exceptions have been reported - 13,124,985,281 cycles # 2.916 GHz - 39,566,085,965 instructions # 3.01 insn per cycle - 4.502971209 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13211) (avx2: 0) (512y: 0) (512z: 0) + 13,105,607,424 cycles # 2.836 GHz + 39,562,262,758 instructions # 3.02 insn per cycle + 4.622614183 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.369414e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.386730e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.386730e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.117944e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.134423e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.134423e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.969220 sec +TOTAL : 2.026386 sec INFO: No Floating Point Exceptions have been reported - 5,641,945,348 cycles # 2.860 GHz - 13,831,299,800 instructions # 2.45 insn per cycle - 1.973388441 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11548) (512y: 0) (512z: 0) + 5,589,116,983 cycles # 2.754 GHz + 13,823,429,494 instructions # 2.47 insn per cycle + 2.030436364 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11530) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.516955e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.539518e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.539518e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.385930e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.407557e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.407557e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.732837 sec +TOTAL : 1.753538 sec INFO: No Floating Point Exceptions have been reported - 4,949,423,150 cycles # 2.851 GHz - 12,512,060,098 instructions # 2.53 insn per cycle - 1.737031693 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10473) (512y: 88) (512z: 0) + 4,940,731,112 cycles # 2.812 GHz + 12,505,003,217 instructions # 2.53 insn per cycle + 1.757654269 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10449) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.477478e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.491068e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.491068e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.329600e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.342625e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.342625e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.202805 sec +TOTAL : 2.243900 sec INFO: No Floating Point Exceptions have been reported - 4,149,646,633 cycles # 1.881 GHz - 6,397,797,180 instructions # 1.54 insn per cycle - 2.207008648 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1998) (512y: 102) (512z: 9391) + 4,145,687,524 cycles # 1.845 GHz + 6,390,893,367 instructions # 1.54 insn per cycle + 2.248144727 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1974) (512y: 102) (512z: 9391) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index 151bf82020..28e1d95034 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:56:33 +DATE: 2024-08-08_20:24:16 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,15 +50,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.203392e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.508627e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.510845e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.229613e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.520921e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.523094e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.513307 sec +TOTAL : 0.513514 sec INFO: No Floating Point Exceptions have been reported - 2,189,978,735 cycles # 2.953 GHz - 3,500,307,012 instructions # 1.60 insn per cycle - 0.803039724 seconds time elapsed + 2,168,346,936 cycles # 2.927 GHz + 3,433,459,385 instructions # 1.58 insn per cycle + 0.802152079 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -70,18 +70,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.744166e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.178157e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.179382e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.733483e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.157890e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.159150e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.194253 sec +TOTAL : 3.199522 sec INFO: No Floating Point Exceptions have been reported - 10,437,486,034 cycles # 3.029 GHz - 24,018,289,609 instructions # 2.30 insn per cycle - 3.502431258 seconds time elapsed + 10,294,194,017 cycles # 2.982 GHz + 21,521,466,269 instructions # 2.09 insn per cycle + 3.508277099 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -98,20 +100,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.949609e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.950599e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.950599e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.923954e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.924900e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.924900e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.422477 sec +TOTAL : 8.530428 sec INFO: No Floating Point Exceptions have been reported - 25,685,602,378 cycles # 3.049 GHz - 78,962,442,414 instructions # 3.07 insn per cycle - 8.426740936 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4856) (avx2: 0) (512y: 0) (512z: 0) + 25,661,796,778 cycles # 3.007 GHz + 78,954,509,974 instructions # 3.08 insn per cycle + 8.534417643 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4843) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -126,20 +129,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.636823e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.640239e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.640239e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.615782e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.619130e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.619130e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.518815 sec +TOTAL : 4.541944 sec INFO: No Floating Point Exceptions have been reported - 13,125,924,467 cycles # 2.903 GHz - 39,566,231,833 instructions # 3.01 insn per cycle - 4.523062408 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13211) (avx2: 0) (512y: 0) (512z: 0) + 13,126,189,517 cycles # 2.888 GHz + 39,559,744,202 instructions # 3.01 insn per cycle + 4.546027002 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -154,20 +158,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.318809e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.336114e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.336114e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.299850e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.317113e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.317113e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.981355 sec +TOTAL : 1.982404 sec INFO: No Floating Point Exceptions have been reported - 5,639,339,757 cycles # 2.841 GHz - 13,830,515,441 instructions # 2.45 insn per cycle - 1.985600666 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11548) (512y: 0) (512z: 0) + 5,586,639,772 cycles # 2.813 GHz + 13,823,166,385 instructions # 2.47 insn per cycle + 1.986590396 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11530) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -182,20 +187,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.503636e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.525847e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.525847e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.384353e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.406906e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.406906e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.735480 sec +TOTAL : 1.753945 sec INFO: No Floating Point Exceptions have been reported - 4,951,242,002 cycles # 2.848 GHz - 12,513,124,213 instructions # 2.53 insn per cycle - 1.739580059 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10473) (512y: 88) (512z: 0) + 4,942,572,018 cycles # 2.813 GHz + 12,504,933,165 instructions # 2.53 insn per cycle + 1.758084275 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10449) (512y: 88) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -210,20 +216,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.313441e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.326736e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.326736e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.317460e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.330821e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.330821e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.252460 sec +TOTAL : 2.247518 sec INFO: No Floating Point Exceptions have been reported - 4,155,241,568 cycles # 1.842 GHz - 6,398,190,319 instructions # 1.54 insn per cycle - 2.256689169 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1998) (512y: 102) (512z: 9391) + 4,146,774,770 cycles # 1.843 GHz + 6,391,452,350 instructions # 1.54 insn per cycle + 2.251569316 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1974) (512y: 102) (512z: 9391) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index cbb644cf1f..ef490ee27f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:22:45 +DATE: 2024-08-08_19:55:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.472054e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.498222e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.500508e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.468386e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.495424e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497730e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.523112 sec +TOTAL : 0.528153 sec INFO: No Floating Point Exceptions have been reported - 2,236,042,620 cycles # 2.962 GHz - 3,533,085,294 instructions # 1.58 insn per cycle - 0.932117194 seconds time elapsed + 2,223,041,093 cycles # 2.885 GHz + 3,357,279,580 instructions # 1.51 insn per cycle + 0.829273079 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.141957e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.170783e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.171976e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.133736e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.163273e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.164433e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.018899 sec +TOTAL : 3.026404 sec INFO: No Floating Point Exceptions have been reported - 9,915,480,311 cycles # 3.029 GHz - 22,358,141,614 instructions # 2.25 insn per cycle - 3.328323631 seconds time elapsed + 9,787,087,404 cycles # 2.984 GHz + 20,868,236,699 instructions # 2.13 insn per cycle + 3.335921488 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.935763e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.936655e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.936655e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.930451e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.931397e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.931397e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.482533 sec +TOTAL : 8.501967 sec INFO: No Floating Point Exceptions have been reported - 25,605,418,102 cycles # 3.018 GHz - 78,706,969,538 instructions # 3.07 insn per cycle - 8.488935811 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4205) (avx2: 0) (512y: 0) (512z: 0) + 25,635,869,243 cycles # 3.014 GHz + 78,699,985,409 instructions # 3.07 insn per cycle + 8.506017009 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.641012e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.644459e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.644459e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.635004e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.638325e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.638325e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.515146 sec +TOTAL : 4.518323 sec INFO: No Floating Point Exceptions have been reported - 13,056,661,799 cycles # 2.891 GHz - 39,457,635,850 instructions # 3.02 insn per cycle - 4.521450799 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12985) (avx2: 0) (512y: 0) (512z: 0) + 13,043,304,130 cycles # 2.885 GHz + 39,451,387,281 instructions # 3.02 insn per cycle + 4.522544486 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12973) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.270313e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.286680e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.286680e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.103214e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.119837e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.119837e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.992198 sec +TOTAL : 2.030819 sec INFO: No Floating Point Exceptions have been reported - 5,675,658,087 cycles # 2.844 GHz - 13,917,752,986 instructions # 2.45 insn per cycle - 1.998576575 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11610) (512y: 0) (512z: 0) + 5,706,370,481 cycles # 2.806 GHz + 13,911,650,507 instructions # 2.44 insn per cycle + 2.034636014 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11592) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.387023e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.408110e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.408110e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.209342e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.231718e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.231718e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.756363 sec +TOTAL : 1.787809 sec INFO: No Floating Point Exceptions have been reported - 4,993,263,033 cycles # 2.837 GHz - 12,609,677,124 instructions # 2.53 insn per cycle - 1.762986317 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10457) (512y: 240) (512z: 0) + 4,991,279,132 cycles # 2.786 GHz + 12,604,125,286 instructions # 2.53 insn per cycle + 1.792337833 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10433) (512y: 240) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.319140e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.332490e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.332490e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.276351e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.289893e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.289893e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.250531 sec +TOTAL : 2.260957 sec INFO: No Floating Point Exceptions have been reported - 4,162,515,901 cycles # 1.847 GHz - 6,507,204,315 instructions # 1.56 insn per cycle - 2.256696447 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1774) (512y: 194) (512z: 9387) + 4,149,253,590 cycles # 1.833 GHz + 6,500,352,718 instructions # 1.57 insn per cycle + 2.264815173 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1750) (512y: 194) (512z: 9387) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 6a2d99b3cd..bbaea3caef 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:43:06 +DATE: 2024-08-08_20:10:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.253055e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.278247e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.280335e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.246678e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.268467e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.270191e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.536312 sec +TOTAL : 0.534085 sec INFO: No Floating Point Exceptions have been reported - 2,250,274,110 cycles # 2.945 GHz - 3,555,189,986 instructions # 1.58 insn per cycle - 0.822638069 seconds time elapsed + 2,285,518,624 cycles # 2.953 GHz + 3,580,561,444 instructions # 1.57 insn per cycle + 0.832119310 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.760808e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.789087e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.790247e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.761384e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.784291e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.785252e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.306329 sec +TOTAL : 3.301764 sec INFO: No Floating Point Exceptions have been reported - 10,724,834,853 cycles # 3.010 GHz - 24,802,280,834 instructions # 2.31 insn per cycle - 3.619173157 seconds time elapsed + 10,582,525,253 cycles # 2.981 GHz + 22,709,986,647 instructions # 2.15 insn per cycle + 3.609006709 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.381772e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.382276e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.382276e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.342825e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.343311e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.343311e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 37.438618 sec +TOTAL : 37.771526 sec INFO: No Floating Point Exceptions have been reported - 113,491,355,747 cycles # 3.031 GHz - 144,836,012,190 instructions # 1.28 insn per cycle - 37.442851666 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21407) (avx2: 0) (512y: 0) (512z: 0) + 112,991,669,428 cycles # 2.992 GHz + 144,862,430,473 instructions # 1.28 insn per cycle + 37.775737563 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21361) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.196978e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.199549e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.199549e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.180115e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.182680e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.182680e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.139131 sec +TOTAL : 5.162984 sec INFO: No Floating Point Exceptions have been reported - 14,751,408,883 cycles # 2.869 GHz - 37,659,055,647 instructions # 2.55 insn per cycle - 5.143466599 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68265) (avx2: 0) (512y: 0) (512z: 0) + 14,747,517,010 cycles # 2.855 GHz + 37,650,782,777 instructions # 2.55 insn per cycle + 5.167050022 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68253) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.244187e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.258766e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.258766e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.587961e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.601478e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.601478e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.273650 sec +TOTAL : 2.167267 sec INFO: No Floating Point Exceptions have been reported - 6,132,413,035 cycles # 2.692 GHz - 13,068,215,974 instructions # 2.13 insn per cycle - 2.278418111 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46983) (512y: 0) (512z: 0) + 6,123,933,660 cycles # 2.822 GHz + 13,061,783,520 instructions # 2.13 insn per cycle + 2.171395105 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.165285e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.187084e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.187084e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.164851e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.185111e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.185111e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.798975 sec +TOTAL : 1.795482 sec INFO: No Floating Point Exceptions have been reported - 5,073,754,093 cycles # 2.815 GHz - 11,460,503,333 instructions # 2.26 insn per cycle - 1.803277093 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40514) (512y: 285) (512z: 0) + 5,057,846,668 cycles # 2.812 GHz + 11,453,287,308 instructions # 2.26 insn per cycle + 1.799543537 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40490) (512y: 285) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.668145e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.682901e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.682901e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.447733e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.461062e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.461062e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.148661 sec +TOTAL : 2.208265 sec INFO: No Floating Point Exceptions have been reported - 3,962,496,219 cycles # 1.845 GHz - 5,934,435,276 instructions # 1.50 insn per cycle - 2.153008467 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2456) (512y: 337) (512z:39348) + 3,952,574,407 cycles # 1.787 GHz + 5,928,010,897 instructions # 1.50 insn per cycle + 2.212410955 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2432) (512y: 337) (512z:39348) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index 1c810d5448..7583c01cf4 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:44:10 +DATE: 2024-08-08_20:11:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.265225e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.289962e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.291972e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.275171e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.299147e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.301063e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.532495 sec +TOTAL : 0.533669 sec INFO: No Floating Point Exceptions have been reported - 2,271,399,447 cycles # 2.937 GHz - 3,489,556,693 instructions # 1.54 insn per cycle - 0.830540125 seconds time elapsed + 2,269,961,618 cycles # 2.940 GHz + 3,538,568,106 instructions # 1.56 insn per cycle + 0.830876846 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.744110e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.772071e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.773220e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.755572e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.778494e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.779486e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.299405 sec +TOTAL : 3.298195 sec INFO: No Floating Point Exceptions have been reported - 10,705,227,455 cycles # 3.011 GHz - 24,486,514,480 instructions # 2.29 insn per cycle - 3.611307721 seconds time elapsed + 10,673,699,971 cycles # 3.000 GHz + 24,748,682,176 instructions # 2.32 insn per cycle + 3.615699896 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,26 +97,27 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.357685e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.358166e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.358166e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.321186e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.321644e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.321644e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 37.644054 sec +TOTAL : 37.957787 sec INFO: No Floating Point Exceptions have been reported - 113,873,744,260 cycles # 3.025 GHz - 144,286,882,817 instructions # 1.27 insn per cycle - 37.648261793 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21037) (avx2: 0) (512y: 0) (512z: 0) + 113,686,913,957 cycles # 2.995 GHz + 144,259,453,305 instructions # 1.27 insn per cycle + 37.961860960 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20934) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198140450E-004 -Relative difference = 2.83729918072716e-07 +Avg ME (F77/C++) = 6.6266731198140439E-004 +Relative difference = 2.8372991823632784e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.015988e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.018316e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.018316e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.073725e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.076096e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.076096e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.446710 sec +TOTAL : 5.341043 sec INFO: No Floating Point Exceptions have been reported - 15,292,753,074 cycles # 2.806 GHz - 38,397,818,203 instructions # 2.51 insn per cycle - 5.451032302 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69655) (avx2: 0) (512y: 0) (512z: 0) + 15,271,797,585 cycles # 2.858 GHz + 38,390,165,623 instructions # 2.51 insn per cycle + 5.345237036 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.782928e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.797665e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.797665e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.624786e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.638797e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.638797e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.116755 sec +TOTAL : 2.157053 sec INFO: No Floating Point Exceptions have been reported - 6,022,769,262 cycles # 2.841 GHz - 12,941,827,772 instructions # 2.15 insn per cycle - 2.120967579 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46109) (512y: 0) (512z: 0) + 6,008,150,983 cycles # 2.781 GHz + 12,934,571,742 instructions # 2.15 insn per cycle + 2.161176604 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46091) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.211510e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.233072e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.233072e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.062477e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.083007e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.083007e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.789778 sec +TOTAL : 1.815728 sec INFO: No Floating Point Exceptions have been reported - 5,100,919,756 cycles # 2.845 GHz - 11,456,622,218 instructions # 2.25 insn per cycle - 1.793897318 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40158) (512y: 219) (512z: 0) + 5,090,244,384 cycles # 2.798 GHz + 11,449,331,673 instructions # 2.25 insn per cycle + 1.819810741 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40134) (512y: 219) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.723861e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.738531e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.738531e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.561516e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.575406e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.575406e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.132674 sec +TOTAL : 2.175028 sec INFO: No Floating Point Exceptions have been reported - 3,961,317,157 cycles # 1.854 GHz - 5,896,891,551 instructions # 1.49 insn per cycle - 2.137219432 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38926) + 3,947,332,966 cycles # 1.812 GHz + 5,889,708,142 instructions # 1.49 insn per cycle + 2.179231650 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1947) (512y: 259) (512z:38926) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index ee69d40ca4..52d8759019 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:23:18 +DATE: 2024-08-08_19:56:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.969401e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.014832e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.020101e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.984596e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.027561e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.032406e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.489746 sec +TOTAL : 0.485881 sec INFO: No Floating Point Exceptions have been reported - 2,016,931,047 cycles # 2.848 GHz - 3,025,159,890 instructions # 1.50 insn per cycle - 0.927349827 seconds time elapsed + 2,058,871,536 cycles # 2.917 GHz + 3,048,657,677 instructions # 1.48 insn per cycle + 0.765585250 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.205747e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.264876e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.267666e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.127584e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.186636e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.189605e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.784442 sec +TOTAL : 1.790632 sec INFO: No Floating Point Exceptions have been reported - 5,821,380,715 cycles # 2.891 GHz - 11,584,732,165 instructions # 1.99 insn per cycle - 2.072616133 seconds time elapsed + 5,978,175,900 cycles # 2.960 GHz + 12,554,229,706 instructions # 2.10 insn per cycle + 2.078428019 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.001998e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.003043e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.003043e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.983107e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.984075e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.984075e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.202217 sec +TOTAL : 8.275184 sec INFO: No Floating Point Exceptions have been reported - 24,956,731,457 cycles # 3.042 GHz - 79,116,565,186 instructions # 3.17 insn per cycle - 8.208701764 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3586) (avx2: 0) (512y: 0) (512z: 0) + 24,981,677,575 cycles # 3.018 GHz + 79,112,697,083 instructions # 3.17 insn per cycle + 8.279194518 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3573) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.262600e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.275603e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.275603e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.049042e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.062007e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.062007e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.267084 sec +TOTAL : 2.331496 sec INFO: No Floating Point Exceptions have been reported - 6,521,430,213 cycles # 2.873 GHz - 20,277,954,673 instructions # 3.11 insn per cycle - 2.273392083 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13797) (avx2: 0) (512y: 0) (512z: 0) + 6,513,667,582 cycles # 2.790 GHz + 20,270,685,743 instructions # 3.11 insn per cycle + 2.335321002 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13785) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.663561e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.670648e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.670648e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.631322e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.638001e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.638001e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.994752 sec +TOTAL : 1.010094 sec INFO: No Floating Point Exceptions have been reported - 2,837,720,578 cycles # 2.844 GHz - 7,073,170,279 instructions # 2.49 insn per cycle - 1.000889784 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12076) (512y: 0) (512z: 0) + 2,858,902,160 cycles # 2.822 GHz + 7,066,281,657 instructions # 2.47 insn per cycle + 1.013626411 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12058) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.876549e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.885485e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.885485e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.855078e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.863833e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.863833e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.882850 sec +TOTAL : 0.888854 sec INFO: No Floating Point Exceptions have been reported - 2,531,723,545 cycles # 2.859 GHz - 6,411,241,908 instructions # 2.53 insn per cycle - 0.889144965 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11050) (512y: 43) (512z: 0) + 2,514,609,187 cycles # 2.820 GHz + 6,403,227,199 instructions # 2.55 insn per cycle + 0.892442076 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11026) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.415709e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.420844e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.420844e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.472481e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.477974e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.477974e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.172695 sec +TOTAL : 1.118887 sec INFO: No Floating Point Exceptions have been reported - 2,078,810,102 cycles # 1.776 GHz - 3,311,309,421 instructions # 1.59 insn per cycle - 1.179156364 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2615) (512y: 46) (512z: 9609) + 2,071,045,676 cycles # 1.846 GHz + 3,304,181,825 instructions # 1.60 insn per cycle + 1.122589043 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2591) (512y: 46) (512z: 9609) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 73ac8ddea9..d4f5540c08 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:52:52 +DATE: 2024-08-08_20:20:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -53,15 +53,15 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.333953e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.949066e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.949066e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.362722e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.966550e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.966550e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.475381 sec +TOTAL : 0.475517 sec INFO: No Floating Point Exceptions have been reported - 2,022,586,615 cycles # 2.949 GHz - 3,025,069,636 instructions # 1.50 insn per cycle - 0.744809477 seconds time elapsed + 2,001,123,741 cycles # 2.916 GHz + 3,014,989,818 instructions # 1.51 insn per cycle + 0.744972192 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -79,18 +79,20 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.011106e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.163446e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.163446e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.951093e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.086269e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.086269e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 -TOTAL : 1.955963 sec +TOTAL : 1.963357 sec INFO: No Floating Point Exceptions have been reported - 6,597,079,956 cycles # 3.008 GHz - 13,224,291,569 instructions # 2.00 insn per cycle - 2.249954519 seconds time elapsed + 6,464,131,212 cycles # 2.938 GHz + 13,280,566,465 instructions # 2.05 insn per cycle + 2.255825453 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -108,20 +110,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.979285e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.980287e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.980287e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.961986e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.962995e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.962995e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.298271 sec +TOTAL : 8.366737 sec INFO: No Floating Point Exceptions have been reported - 24,988,096,548 cycles # 3.010 GHz - 79,123,467,090 instructions # 3.17 insn per cycle - 8.302811323 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3586) (avx2: 0) (512y: 0) (512z: 0) + 25,004,224,949 cycles # 2.987 GHz + 79,113,889,000 instructions # 3.16 insn per cycle + 8.370993372 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3573) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -137,20 +140,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.249110e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.263380e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.263380e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.168882e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.181926e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.181926e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.273331 sec +TOTAL : 2.295100 sec INFO: No Floating Point Exceptions have been reported - 6,535,487,857 cycles # 2.871 GHz - 20,286,980,927 instructions # 3.10 insn per cycle - 2.277574276 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13797) (avx2: 0) (512y: 0) (512z: 0) + 6,522,736,001 cycles # 2.838 GHz + 20,279,496,113 instructions # 3.11 insn per cycle + 2.299251518 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13785) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -166,20 +170,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.665095e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.672204e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.672204e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.604472e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.610985e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.610985e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.995964 sec +TOTAL : 1.029832 sec INFO: No Floating Point Exceptions have been reported - 2,845,901,940 cycles # 2.847 GHz - 7,083,238,590 instructions # 2.49 insn per cycle - 1.000282605 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12076) (512y: 0) (512z: 0) + 2,869,187,737 cycles # 2.777 GHz + 7,075,475,577 instructions # 2.47 insn per cycle + 1.033942723 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12058) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -195,20 +200,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.875871e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.884797e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.884797e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.863942e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.872787e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.872787e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.884953 sec +TOTAL : 0.887626 sec INFO: No Floating Point Exceptions have been reported - 2,544,773,506 cycles # 2.864 GHz - 6,420,481,607 instructions # 2.52 insn per cycle - 0.889219241 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11050) (512y: 43) (512z: 0) + 2,527,038,904 cycles # 2.836 GHz + 6,413,204,152 instructions # 2.54 insn per cycle + 0.891739175 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11026) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -224,20 +230,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.484572e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.489964e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.489964e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.473762e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.479361e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.479361e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.115836 sec +TOTAL : 1.120677 sec INFO: No Floating Point Exceptions have been reported - 2,087,800,742 cycles # 1.865 GHz - 3,321,180,595 instructions # 1.59 insn per cycle - 1.120177136 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2615) (512y: 46) (512z: 9609) + 2,080,597,436 cycles # 1.851 GHz + 3,313,716,206 instructions # 1.59 insn per cycle + 1.124889543 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2591) (512y: 46) (512z: 9609) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 2632c41b0b..2bbd6d0428 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_21:02:29 +DATE: 2024-08-08_20:30:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.991378e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.036667e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.041414e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.027396e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.072992e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.077839e+05 ) sec^-1 MeanMatrixElemValue = ( 4.159396e-01 +- 3.238803e-01 ) GeV^-4 -TOTAL : 0.468693 sec +TOTAL : 0.472420 sec INFO: No Floating Point Exceptions have been reported - 2,029,615,278 cycles # 2.948 GHz - 3,030,247,378 instructions # 1.49 insn per cycle - 0.745163582 seconds time elapsed + 2,017,335,926 cycles # 2.929 GHz + 2,996,516,741 instructions # 1.49 insn per cycle + 0.747617629 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.178274e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.240581e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.243327e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.176066e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.236543e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.239377e+05 ) sec^-1 MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.866265 sec +TOTAL : 1.869944 sec INFO: No Floating Point Exceptions have been reported - 6,299,743,710 cycles # 3.010 GHz - 12,600,740,032 instructions # 2.00 insn per cycle - 2.149831052 seconds time elapsed + 6,204,679,090 cycles # 2.959 GHz + 13,136,993,437 instructions # 2.12 insn per cycle + 2.155017166 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.998329e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.999303e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.999303e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.981113e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.982134e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.982134e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 8.216848 sec +TOTAL : 8.283937 sec INFO: No Floating Point Exceptions have been reported - 24,974,321,873 cycles # 3.039 GHz - 79,117,922,163 instructions # 3.17 insn per cycle - 8.220818471 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3586) (avx2: 0) (512y: 0) (512z: 0) + 24,969,353,482 cycles # 3.013 GHz + 79,108,034,680 instructions # 3.17 insn per cycle + 8.287825380 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3573) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.280961e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.294635e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.294635e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.181056e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.194443e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.194443e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.261486 sec +TOTAL : 2.289520 sec INFO: No Floating Point Exceptions have been reported - 6,522,836,555 cycles # 2.880 GHz - 20,276,184,021 instructions # 3.11 insn per cycle - 2.265504910 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13797) (avx2: 0) (512y: 0) (512z: 0) + 6,518,141,305 cycles # 2.843 GHz + 20,270,157,027 instructions # 3.11 insn per cycle + 2.293380252 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13785) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.668062e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.674946e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.674946e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.629677e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.636717e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.636717e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.992017 sec +TOTAL : 1.012223 sec INFO: No Floating Point Exceptions have been reported - 2,836,186,752 cycles # 2.849 GHz - 7,070,350,824 instructions # 2.49 insn per cycle - 0.995979730 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12076) (512y: 0) (512z: 0) + 2,864,292,228 cycles # 2.821 GHz + 7,063,008,029 instructions # 2.47 insn per cycle + 1.016182729 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12058) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.868046e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.876797e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.876797e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.830887e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.839546e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.839546e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.886960 sec +TOTAL : 0.901658 sec INFO: No Floating Point Exceptions have been reported - 2,536,605,252 cycles # 2.849 GHz - 6,407,504,392 instructions # 2.53 insn per cycle - 0.890901975 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11050) (512y: 43) (512z: 0) + 2,522,018,356 cycles # 2.787 GHz + 6,399,988,861 instructions # 2.54 insn per cycle + 0.905644388 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11026) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.493995e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.499521e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.499521e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.485210e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.490986e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.490986e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.107008 sec +TOTAL : 1.110909 sec INFO: No Floating Point Exceptions have been reported - 2,080,229,154 cycles # 1.874 GHz - 3,307,176,552 instructions # 1.59 insn per cycle - 1.110975088 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2615) (512y: 46) (512z: 9609) + 2,072,711,689 cycles # 1.860 GHz + 3,301,709,135 instructions # 1.59 insn per cycle + 1.114884740 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2591) (512y: 46) (512z: 9609) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index a8c80db365..687ea21e82 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:59:44 +DATE: 2024-08-08_20:27:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.978745e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.024403e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.029368e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.974387e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.019107e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.024136e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.471366 sec +TOTAL : 0.465731 sec INFO: No Floating Point Exceptions have been reported - 2,050,817,847 cycles # 2.957 GHz - 3,042,700,404 instructions # 1.48 insn per cycle - 0.752287748 seconds time elapsed + 1,986,250,676 cycles # 2.933 GHz + 2,951,574,048 instructions # 1.49 insn per cycle + 0.733704221 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.124223e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.185934e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.188655e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.127905e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.186845e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.189533e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.817666 sec +TOTAL : 1.821509 sec INFO: No Floating Point Exceptions have been reported - 6,209,571,440 cycles # 3.020 GHz - 12,616,264,854 instructions # 2.03 insn per cycle - 2.112773344 seconds time elapsed + 6,099,068,812 cycles # 2.975 GHz + 13,255,673,376 instructions # 2.17 insn per cycle + 2.106639688 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.001801e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.002828e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.002828e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.982878e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.983848e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.983848e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.201054 sec +TOTAL : 8.276232 sec INFO: No Floating Point Exceptions have been reported - 24,965,655,332 cycles # 3.043 GHz - 79,115,538,738 instructions # 3.17 insn per cycle - 8.205177482 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3586) (avx2: 0) (512y: 0) (512z: 0) + 24,992,064,451 cycles # 3.019 GHz + 79,108,890,354 instructions # 3.17 insn per cycle + 8.280274971 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3573) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.122075e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.140371e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.140371e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.180915e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.194829e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.194829e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.312173 sec +TOTAL : 2.288781 sec INFO: No Floating Point Exceptions have been reported - 6,529,449,643 cycles # 2.822 GHz - 20,279,014,819 instructions # 3.11 insn per cycle - 2.317005613 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13797) (avx2: 0) (512y: 0) (512z: 0) + 6,519,434,997 cycles # 2.844 GHz + 20,271,064,648 instructions # 3.11 insn per cycle + 2.292801258 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13785) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.671295e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.678689e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.678689e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.639199e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.645912e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.645912e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.989320 sec +TOTAL : 1.005317 sec INFO: No Floating Point Exceptions have been reported - 2,833,882,819 cycles # 2.855 GHz - 7,072,820,246 instructions # 2.50 insn per cycle - 0.993299246 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12076) (512y: 0) (512z: 0) + 2,861,574,039 cycles # 2.837 GHz + 7,065,482,922 instructions # 2.47 insn per cycle + 1.009367222 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12058) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.845765e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.854604e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.854604e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.841221e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.849583e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.849583e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.896647 sec +TOTAL : 0.895518 sec INFO: No Floating Point Exceptions have been reported - 2,532,364,843 cycles # 2.814 GHz - 6,410,761,496 instructions # 2.53 insn per cycle - 0.900735690 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11050) (512y: 43) (512z: 0) + 2,517,844,676 cycles # 2.802 GHz + 6,403,839,691 instructions # 2.54 insn per cycle + 0.899537508 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11026) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.479281e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.485002e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.485002e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.455203e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.460404e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.460404e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.116768 sec +TOTAL : 1.132212 sec INFO: No Floating Point Exceptions have been reported - 2,077,875,816 cycles # 1.855 GHz - 3,310,773,706 instructions # 1.59 insn per cycle - 1.120857530 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2615) (512y: 46) (512z: 9609) + 2,067,552,649 cycles # 1.821 GHz + 3,303,460,015 instructions # 1.60 insn per cycle + 1.136266053 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2591) (512y: 46) (512z: 9609) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index df70aedb33..5238dd29f1 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:57:05 +DATE: 2024-08-08_20:24:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,15 +50,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.434560e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.027161e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.032207e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.461156e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.032316e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.037418e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.472410 sec +TOTAL : 0.471716 sec INFO: No Floating Point Exceptions have been reported - 2,024,542,487 cycles # 2.941 GHz - 2,972,484,593 instructions # 1.47 insn per cycle - 0.746922396 seconds time elapsed + 2,015,572,444 cycles # 2.959 GHz + 3,048,101,818 instructions # 1.51 insn per cycle + 0.739787706 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -70,18 +70,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.188309e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.266321e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.269256e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.217590e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.274346e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.276990e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 -TOTAL : 1.895330 sec +TOTAL : 1.888870 sec INFO: No Floating Point Exceptions have been reported - 6,131,858,588 cycles # 2.889 GHz - 13,047,146,236 instructions # 2.13 insn per cycle - 2.180256479 seconds time elapsed + 6,296,963,935 cycles # 2.979 GHz + 13,479,190,689 instructions # 2.14 insn per cycle + 2.172551421 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -98,20 +100,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.010951e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.011958e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.011958e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.967176e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.968130e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.968130e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.164059 sec +TOTAL : 8.342097 sec INFO: No Floating Point Exceptions have been reported - 24,967,412,253 cycles # 3.057 GHz - 79,119,765,586 instructions # 3.17 insn per cycle - 8.168055309 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3586) (avx2: 0) (512y: 0) (512z: 0) + 24,950,965,102 cycles # 2.990 GHz + 79,109,236,780 instructions # 3.17 insn per cycle + 8.346055445 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3573) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -126,20 +129,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.311869e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.325581e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.325581e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.089881e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.103174e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.103174e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.250985 sec +TOTAL : 2.317816 sec INFO: No Floating Point Exceptions have been reported - 6,523,933,882 cycles # 2.894 GHz - 20,278,721,313 instructions # 3.11 insn per cycle - 2.255081666 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13797) (avx2: 0) (512y: 0) (512z: 0) + 6,512,194,963 cycles # 2.805 GHz + 20,270,944,427 instructions # 3.11 insn per cycle + 2.322212487 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13785) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -154,20 +158,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.665544e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.672629e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.672629e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.538805e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.544913e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.544913e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.992754 sec +TOTAL : 1.070841 sec INFO: No Floating Point Exceptions have been reported - 2,835,266,942 cycles # 2.846 GHz - 7,072,901,733 instructions # 2.49 insn per cycle - 0.996781737 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12076) (512y: 0) (512z: 0) + 2,864,836,878 cycles # 2.667 GHz + 7,066,173,206 instructions # 2.47 insn per cycle + 1.075040197 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12058) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -182,20 +187,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.852270e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.861343e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.861343e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.841038e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.849527e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.849527e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.893815 sec +TOTAL : 0.895722 sec INFO: No Floating Point Exceptions have been reported - 2,533,769,116 cycles # 2.823 GHz - 6,410,639,055 instructions # 2.53 insn per cycle - 0.898045028 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11050) (512y: 43) (512z: 0) + 2,515,535,185 cycles # 2.798 GHz + 6,403,562,449 instructions # 2.55 insn per cycle + 0.899557326 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11026) (512y: 43) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -210,20 +216,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.489228e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.494808e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.494808e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.475627e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.481124e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.481124e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.109565 sec +TOTAL : 1.116628 sec INFO: No Floating Point Exceptions have been reported - 2,077,805,915 cycles # 1.867 GHz - 3,310,655,645 instructions # 1.59 insn per cycle - 1.113619813 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2615) (512y: 46) (512z: 9609) + 2,068,334,570 cycles # 1.847 GHz + 3,303,479,670 instructions # 1.60 insn per cycle + 1.120666931 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2591) (512y: 46) (512z: 9609) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index d204cfd6c4..498b2cd37c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:23:43 +DATE: 2024-08-08_19:56:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.984175e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.025153e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.029869e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.966632e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.010698e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.016169e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.487139 sec +TOTAL : 0.489605 sec INFO: No Floating Point Exceptions have been reported - 2,063,500,473 cycles # 2.945 GHz - 3,107,238,252 instructions # 1.51 insn per cycle - 0.908009925 seconds time elapsed + 2,010,594,089 cycles # 2.844 GHz + 3,012,973,454 instructions # 1.50 insn per cycle + 0.767009476 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.200654e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.259252e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.261790e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.185325e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.243689e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.246525e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.784294 sec +TOTAL : 1.784742 sec INFO: No Floating Point Exceptions have been reported - 6,058,623,406 cycles # 3.012 GHz - 12,200,781,654 instructions # 2.01 insn per cycle - 2.070363274 seconds time elapsed + 6,010,360,971 cycles # 2.981 GHz + 12,082,269,886 instructions # 2.01 insn per cycle + 2.072759359 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.933157e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.934098e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.934098e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.982152e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.983118e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.983118e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.493120 sec +TOTAL : 8.279488 sec INFO: No Floating Point Exceptions have been reported - 24,883,798,967 cycles # 2.929 GHz - 78,851,332,915 instructions # 3.17 insn per cycle - 8.499380928 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3106) (avx2: 0) (512y: 0) (512z: 0) + 24,906,847,273 cycles # 3.008 GHz + 78,843,477,297 instructions # 3.17 insn per cycle + 8.283438125 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.076928e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.090345e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.090345e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.430488e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.444488e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.444488e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.325308 sec +TOTAL : 2.211830 sec INFO: No Floating Point Exceptions have been reported - 6,469,756,685 cycles # 2.778 GHz - 20,237,393,653 instructions # 3.13 insn per cycle - 2.332670420 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13509) (avx2: 0) (512y: 0) (512z: 0) + 6,461,373,436 cycles # 2.917 GHz + 20,229,460,939 instructions # 3.13 insn per cycle + 2.215383125 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13497) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.522202e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.528155e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.528155e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.546141e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.552346e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.552346e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.086389 sec +TOTAL : 1.065436 sec INFO: No Floating Point Exceptions have been reported - 2,987,340,396 cycles # 2.741 GHz - 7,214,189,758 instructions # 2.41 insn per cycle - 1.094660294 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12458) (512y: 0) (512z: 0) + 2,970,223,700 cycles # 2.780 GHz + 7,206,483,333 instructions # 2.43 insn per cycle + 1.069132793 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12440) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.732912e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.740937e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.740937e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.798890e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.807066e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.807066e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.954309 sec +TOTAL : 0.916539 sec INFO: No Floating Point Exceptions have been reported - 2,617,388,524 cycles # 2.732 GHz - 6,551,995,701 instructions # 2.50 insn per cycle - 0.961130408 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11478) (512y: 26) (512z: 0) + 2,599,305,235 cycles # 2.826 GHz + 6,544,414,590 instructions # 2.52 insn per cycle + 0.920171410 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11454) (512y: 26) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.345824e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.350418e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.350418e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.428262e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.433365e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.433365e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.226707 sec +TOTAL : 1.153100 sec INFO: No Floating Point Exceptions have been reported - 2,155,235,101 cycles # 1.751 GHz - 3,469,153,346 instructions # 1.61 insn per cycle - 1.233547264 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3051) (512y: 25) (512z: 9681) + 2,140,036,710 cycles # 1.851 GHz + 3,461,118,107 instructions # 1.62 insn per cycle + 1.156674320 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3027) (512y: 25) (512z: 9681) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 4308c5ba18..dc9ca7a530 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:45:14 +DATE: 2024-08-08_20:12:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.048358e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.095472e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.100450e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.067673e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.110658e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.115133e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.488485 sec +TOTAL : 0.487879 sec INFO: No Floating Point Exceptions have been reported - 2,077,018,410 cycles # 2.955 GHz - 3,109,798,046 instructions # 1.50 insn per cycle - 0.764182373 seconds time elapsed + 2,053,159,539 cycles # 2.919 GHz + 3,075,135,999 instructions # 1.50 insn per cycle + 0.764389501 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.654005e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.729790e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.733030e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.681005e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.744501e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.747278e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.728242 sec +TOTAL : 1.731074 sec INFO: No Floating Point Exceptions have been reported - 5,873,252,538 cycles # 2.998 GHz - 11,848,783,670 instructions # 2.02 insn per cycle - 2.017968081 seconds time elapsed + 5,778,197,761 cycles # 2.951 GHz + 12,437,674,784 instructions # 2.15 insn per cycle + 2.017655879 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,26 +97,27 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.634141e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.634934e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.634934e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.722501e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.723307e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.723307e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 29.116821 sec +TOTAL : 28.664558 sec INFO: No Floating Point Exceptions have been reported - 88,296,563,936 cycles # 3.033 GHz - 135,724,094,332 instructions # 1.54 insn per cycle - 29.120991306 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15654) (avx2: 0) (512y: 0) (512z: 0) + 85,759,268,786 cycles # 2.992 GHz + 135,287,125,941 instructions # 1.58 insn per cycle + 28.668460894 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15198) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275351083142087E-004 -Relative difference = 1.6343060926412837e-08 +Avg ME (F77/C++) = 6.6275351218394313E-004 +Relative difference = 1.8383823081355348e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.056966e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.069695e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.069695e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.988288e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.001222e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.001222e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.332176 sec +TOTAL : 2.351494 sec INFO: No Floating Point Exceptions have been reported - 6,779,570,560 cycles # 2.903 GHz - 19,363,467,868 instructions # 2.86 insn per cycle - 2.336362830 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69602) (avx2: 0) (512y: 0) (512z: 0) + 6,754,834,567 cycles # 2.869 GHz + 19,356,472,261 instructions # 2.87 insn per cycle + 2.355469886 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69590) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.493626e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.499294e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.499294e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.466081e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.471571e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.471571e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.106963 sec +TOTAL : 1.123603 sec INFO: No Floating Point Exceptions have been reported - 3,165,795,966 cycles # 2.853 GHz - 6,799,095,089 instructions # 2.15 insn per cycle - 1.111060477 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:49016) (512y: 0) (512z: 0) + 3,163,501,117 cycles # 2.807 GHz + 6,791,828,071 instructions # 2.15 insn per cycle + 1.127610138 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:48998) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.793077e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.801261e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.801261e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.760032e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.767850e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.767850e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 0.922630 sec +TOTAL : 0.936650 sec INFO: No Floating Point Exceptions have been reported - 2,642,708,250 cycles # 2.854 GHz - 5,977,492,021 instructions # 2.26 insn per cycle - 0.926708959 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42613) (512y: 11) (512z: 0) + 2,623,882,438 cycles # 2.794 GHz + 5,969,895,302 instructions # 2.28 insn per cycle + 0.940643059 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42589) (512y: 11) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.420261e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.425283e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.425283e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.479077e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.484827e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.484827e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.163603 sec +TOTAL : 1.113882 sec INFO: No Floating Point Exceptions have been reported - 2,081,277,351 cycles # 1.784 GHz - 3,501,680,018 instructions # 1.68 insn per cycle - 1.167704285 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5210) (512y: 3) (512z:44834) + 2,068,747,571 cycles # 1.851 GHz + 3,493,400,176 instructions # 1.69 insn per cycle + 1.117954016 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5186) (512y: 3) (512z:44834) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index bdfa627e42..df0f71d174 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:46:02 +DATE: 2024-08-08_20:13:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.099021e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.152119e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.157226e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.128808e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.173626e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.178585e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.484270 sec +TOTAL : 0.487050 sec INFO: No Floating Point Exceptions have been reported - 2,083,532,002 cycles # 2.960 GHz - 3,101,650,257 instructions # 1.49 insn per cycle - 0.760655028 seconds time elapsed + 2,067,516,202 cycles # 2.920 GHz + 3,084,461,624 instructions # 1.49 insn per cycle + 0.767079444 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.768383e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.846177e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.849534e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.729947e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.794330e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.797099e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.709256 sec +TOTAL : 1.715330 sec INFO: No Floating Point Exceptions have been reported - 5,845,346,990 cycles # 3.011 GHz - 12,438,318,638 instructions # 2.13 insn per cycle - 1.998774694 seconds time elapsed + 5,790,416,249 cycles # 2.963 GHz + 12,405,778,334 instructions # 2.14 insn per cycle + 2.012725573 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,26 +97,27 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.712703e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.713504e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.713504e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.739276e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.740108e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.740108e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 28.715532 sec +TOTAL : 28.579010 sec INFO: No Floating Point Exceptions have been reported - 86,154,710,827 cycles # 3.006 GHz - 135,583,041,000 instructions # 1.57 insn per cycle - 28.719661195 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15696) (avx2: 0) (512y: 0) (512z: 0) + 85,869,035,147 cycles # 3.005 GHz + 135,713,098,525 instructions # 1.58 insn per cycle + 28.582934987 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15490) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275346699767868E-004 -Relative difference = 4.979577076821206e-08 +Avg ME (F77/C++) = 6.6275349723624727E-004 +Relative difference = 4.170106635889315e-09 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.004558e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.017373e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.017373e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.656997e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.668108e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.668108e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.349549 sec +TOTAL : 2.468183 sec INFO: No Floating Point Exceptions have been reported - 6,843,302,569 cycles # 2.909 GHz - 19,413,787,446 instructions # 2.84 insn per cycle - 2.353799328 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69633) (avx2: 0) (512y: 0) (512z: 0) + 6,838,146,467 cycles # 2.767 GHz + 19,407,163,330 instructions # 2.84 insn per cycle + 2.472172726 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69621) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.499299e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.504925e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.504925e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.494743e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.500456e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.500456e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.101876 sec +TOTAL : 1.101868 sec INFO: No Floating Point Exceptions have been reported - 3,116,540,240 cycles # 2.820 GHz - 6,722,785,902 instructions # 2.16 insn per cycle - 1.105922589 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47703) (512y: 0) (512z: 0) + 3,102,166,074 cycles # 2.807 GHz + 6,715,779,639 instructions # 2.16 insn per cycle + 1.105919768 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47685) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.797776e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.805926e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.805926e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.757205e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.764907e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.764907e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 0.920074 sec +TOTAL : 0.937783 sec INFO: No Floating Point Exceptions have been reported - 2,632,426,467 cycles # 2.851 GHz - 5,975,861,662 instructions # 2.27 insn per cycle - 0.924164164 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41894) (512y: 13) (512z: 0) + 2,624,045,983 cycles # 2.788 GHz + 5,968,641,196 instructions # 2.27 insn per cycle + 0.941620580 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41870) (512y: 13) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.503457e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.509298e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.509298e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.475717e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.481089e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.481089e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.098991 sec +TOTAL : 1.116160 sec INFO: No Floating Point Exceptions have been reported - 2,074,398,072 cycles # 1.881 GHz - 3,493,830,287 instructions # 1.68 insn per cycle - 1.103135228 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4174) (512y: 4) (512z:44485) + 2,072,491,943 cycles # 1.851 GHz + 3,486,963,775 instructions # 1.68 insn per cycle + 1.120311238 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4150) (512y: 4) (512z:44485) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 37791bd44c..f906b484d1 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:24:08 +DATE: 2024-08-08_19:57:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.466437e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.490895e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.492995e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.456351e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.482973e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.485002e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.528024 sec +TOTAL : 0.527206 sec INFO: No Floating Point Exceptions have been reported - 2,167,248,013 cycles # 2.842 GHz - 3,408,911,296 instructions # 1.57 insn per cycle - 1.059002803 seconds time elapsed + 2,263,706,765 cycles # 2.945 GHz + 3,529,595,149 instructions # 1.56 insn per cycle + 0.828954022 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.140196e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.168579e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.169818e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.128784e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.158212e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.159533e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.031191 sec +TOTAL : 3.057239 sec INFO: No Floating Point Exceptions have been reported - 9,553,855,695 cycles # 2.907 GHz - 20,366,578,683 instructions # 2.13 insn per cycle - 3.341274220 seconds time elapsed + 9,783,417,122 cycles # 2.925 GHz + 13,211,264,053 instructions # 1.35 insn per cycle + 3.405402734 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.843122e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.844017e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.844017e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.903780e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.904695e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.904695e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.908083 sec +TOTAL : 8.621676 sec INFO: No Floating Point Exceptions have been reported - 25,993,175,975 cycles # 2.917 GHz - 79,438,748,307 instructions # 3.06 insn per cycle - 8.914670521 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4789) (avx2: 0) (512y: 0) (512z: 0) + 25,964,721,381 cycles # 3.010 GHz + 79,427,591,787 instructions # 3.06 insn per cycle + 8.626023484 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4776) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.477942e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.481153e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.481153e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.603827e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.607327e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.607327e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.725780 sec +TOTAL : 4.557849 sec INFO: No Floating Point Exceptions have been reported - 12,846,841,244 cycles # 2.717 GHz - 38,833,705,303 instructions # 3.02 insn per cycle - 4.732362911 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13184) (avx2: 0) (512y: 0) (512z: 0) + 12,814,190,735 cycles # 2.810 GHz + 38,825,158,190 instructions # 3.03 insn per cycle + 4.561789335 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13172) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.084067e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.100651e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.100651e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.224833e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.241665e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.241665e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.038475 sec +TOTAL : 2.000761 sec INFO: No Floating Point Exceptions have been reported - 5,582,099,770 cycles # 2.734 GHz - 13,625,182,795 instructions # 2.44 insn per cycle - 2.046074509 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11433) (512y: 0) (512z: 0) + 5,588,116,210 cycles # 2.789 GHz + 13,618,090,861 instructions # 2.44 insn per cycle + 2.004606328 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11415) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.157339e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.179034e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.179034e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.076409e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.097653e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.097653e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.800984 sec +TOTAL : 1.813694 sec INFO: No Floating Point Exceptions have been reported - 4,872,138,012 cycles # 2.701 GHz - 12,304,394,482 instructions # 2.53 insn per cycle - 1.808284460 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10343) (512y: 79) (512z: 0) + 4,900,228,417 cycles # 2.697 GHz + 12,298,153,916 instructions # 2.51 insn per cycle + 1.817598978 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10319) (512y: 79) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.942926e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.955101e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.955101e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.275673e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.288563e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.288563e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.372771 sec +TOTAL : 2.261390 sec INFO: No Floating Point Exceptions have been reported - 4,182,201,238 cycles # 1.761 GHz - 6,398,643,943 instructions # 1.53 insn per cycle - 2.378723749 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1981) (512y: 93) (512z: 9359) + 4,176,196,803 cycles # 1.844 GHz + 6,391,790,037 instructions # 1.53 insn per cycle + 2.265279894 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1957) (512y: 93) (512z: 9359) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index ef7bf47569..965f537970 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-06-28_20:24:42 +DATE: 2024-08-08_19:57:54 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.471540e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.495762e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.497819e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.478905e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.505299e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.507625e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.530504 sec +TOTAL : 0.523820 sec INFO: No Floating Point Exceptions have been reported - 2,208,819,005 cycles # 2.919 GHz - 3,459,394,113 instructions # 1.57 insn per cycle - 0.990710147 seconds time elapsed + 2,217,657,303 cycles # 2.936 GHz + 3,422,937,672 instructions # 1.54 insn per cycle + 0.814906080 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.128166e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.156082e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.157268e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.142523e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.171945e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.173230e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.021368 sec +TOTAL : 3.034284 sec INFO: No Floating Point Exceptions have been reported - 9,847,196,311 cycles # 3.007 GHz - 20,896,495,636 instructions # 2.12 insn per cycle - 3.330773882 seconds time elapsed + 9,867,106,252 cycles # 2.970 GHz + 19,377,940,372 instructions # 1.96 insn per cycle + 3.381320729 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.904896e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.905828e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.905828e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.898812e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.899704e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.899704e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.618971 sec +TOTAL : 8.643841 sec INFO: No Floating Point Exceptions have been reported - 26,017,566,799 cycles # 3.018 GHz - 79,463,058,396 instructions # 3.05 insn per cycle - 8.625612789 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4445) (avx2: 0) (512y: 0) (512z: 0) + 26,013,311,554 cycles # 3.009 GHz + 79,457,517,298 instructions # 3.05 insn per cycle + 8.647992970 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4432) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.466998e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.470121e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.470121e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.611561e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.614888e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.614888e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.740884 sec +TOTAL : 4.547888 sec INFO: No Floating Point Exceptions have been reported - 12,818,499,465 cycles # 2.706 GHz - 38,787,565,320 instructions # 3.03 insn per cycle - 4.748209539 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12946) (avx2: 0) (512y: 0) (512z: 0) + 12,837,773,076 cycles # 2.821 GHz + 38,782,082,140 instructions # 3.02 insn per cycle + 4.551612597 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12934) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.954476e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.970429e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.970429e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.352238e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.369622e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.369622e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.071432 sec +TOTAL : 1.970486 sec INFO: No Floating Point Exceptions have been reported - 5,606,087,504 cycles # 2.701 GHz - 13,739,354,094 instructions # 2.45 insn per cycle - 2.079581267 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11516) (512y: 0) (512z: 0) + 5,585,325,981 cycles # 2.830 GHz + 13,732,293,539 instructions # 2.46 insn per cycle + 1.974370273 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11498) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.052340e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.072919e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.072919e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.400061e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.421825e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.421825e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.820988 sec +TOTAL : 1.751328 sec INFO: No Floating Point Exceptions have been reported - 4,962,658,899 cycles # 2.719 GHz - 12,428,563,963 instructions # 2.50 insn per cycle - 1.828126764 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10334) (512y: 239) (512z: 0) + 4,952,817,402 cycles # 2.822 GHz + 12,422,492,733 instructions # 2.51 insn per cycle + 1.755554143 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10310) (512y: 239) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.784333e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.797005e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.797005e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.219259e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.232248e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.232248e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.427429 sec +TOTAL : 2.278823 sec INFO: No Floating Point Exceptions have been reported - 4,189,591,829 cycles # 1.722 GHz - 6,503,095,126 instructions # 1.55 insn per cycle - 2.438290310 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1804) (512y: 191) (512z: 9368) + 4,182,901,935 cycles # 1.833 GHz + 6,495,418,480 instructions # 1.55 insn per cycle + 2.282695112 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1780) (512y: 191) (512z: 9368) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 881adcbbb3..69ee294d0a 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-06-28_20:26:32 +DATE: 2024-08-08_19:59:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.064817e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.065212e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.065427e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.065566e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.065949e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.066073e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.429437 sec +TOTAL : 2.441334 sec INFO: No Floating Point Exceptions have been reported - 8,296,328,617 cycles # 3.016 GHz - 17,339,087,580 instructions # 2.09 insn per cycle - 2.815159518 seconds time elapsed + 8,270,107,004 cycles # 2.987 GHz + 17,474,421,900 instructions # 2.11 insn per cycle + 2.824451613 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.222997e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.224884e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.225172e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.242290e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.244758e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.245006e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.002446 sec +TOTAL : 4.011109 sec INFO: No Floating Point Exceptions have been reported - 13,014,244,174 cycles # 3.008 GHz - 30,562,795,013 instructions # 2.35 insn per cycle - 4.387764634 seconds time elapsed + 12,991,708,385 cycles # 2.995 GHz + 30,957,069,887 instructions # 2.38 insn per cycle + 4.393935391 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.942411e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.942623e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.942623e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.391032e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.391286e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.391286e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.659863 sec +TOTAL : 6.292298 sec INFO: No Floating Point Exceptions have been reported - 18,952,576,848 cycles # 2.848 GHz - 53,913,437,622 instructions # 2.84 insn per cycle - 6.666065835 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32438) (avx2: 0) (512y: 0) (512z: 0) + 18,909,993,943 cycles # 3.004 GHz + 53,904,007,557 instructions # 2.85 insn per cycle + 6.296177339 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32425) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.646263e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.646555e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.646555e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.592148e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.592238e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.592238e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.258166 sec +TOTAL : 3.319128 sec INFO: No Floating Point Exceptions have been reported - 9,815,076,926 cycles # 3.034 GHz - 27,160,713,930 instructions # 2.77 insn per cycle - 3.265054274 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96511) (avx2: 0) (512y: 0) (512z: 0) + 9,961,985,828 cycles # 2.999 GHz + 27,151,879,178 instructions # 2.73 insn per cycle + 3.323113942 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96499) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.513827e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.514244e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.514244e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.420642e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.421042e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.421042e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.513778 sec +TOTAL : 1.544804 sec INFO: No Floating Point Exceptions have been reported - 4,295,484,479 cycles # 2.840 GHz - 9,598,007,558 instructions # 2.23 insn per cycle - 1.520024006 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84989) (512y: 0) (512z: 0) + 4,330,644,690 cycles # 2.797 GHz + 9,589,874,862 instructions # 2.21 insn per cycle + 1.548809848 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84971) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.021787e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.022321e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.022321e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.965040e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.965659e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.965659e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.323232 sec +TOTAL : 1.333170 sec INFO: No Floating Point Exceptions have been reported - 3,750,332,594 cycles # 2.837 GHz - 8,522,076,942 instructions # 2.27 insn per cycle - 1.329385160 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80643) (512y: 89) (512z: 0) + 3,730,547,974 cycles # 2.792 GHz + 8,513,850,652 instructions # 2.28 insn per cycle + 1.336769828 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80619) (512y: 89) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.657338e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.657868e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.657868e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.618586e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.619123e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.619123e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.450577 sec +TOTAL : 1.462675 sec INFO: No Floating Point Exceptions have been reported - 2,701,855,108 cycles # 1.860 GHz - 4,288,620,965 instructions # 1.59 insn per cycle - 1.454564839 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2876) (512y: 103) (512z:79119) + 2,695,334,241 cycles # 1.839 GHz + 4,280,276,658 instructions # 1.59 insn per cycle + 1.466339679 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2852) (512y: 103) (512z:79119) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 1c1653b55c..e1baa342f4 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-06-28_20:53:17 +DATE: 2024-08-08_20:20:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -53,15 +53,15 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.071657e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.072586e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.072586e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.064923e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.065845e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.065845e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.389807 sec +TOTAL : 2.386081 sec INFO: No Floating Point Exceptions have been reported - 8,166,677,370 cycles # 3.014 GHz - 17,110,502,600 instructions # 2.10 insn per cycle - 2.768730521 seconds time elapsed + 8,068,364,516 cycles # 2.980 GHz + 18,499,320,498 instructions # 2.29 insn per cycle + 2.766222042 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -79,18 +79,20 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.239728e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.274140e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.274140e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.216459e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.248148e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.248148e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.989987 sec +TOTAL : 3.985205 sec INFO: No Floating Point Exceptions have been reported - 12,984,716,355 cycles # 3.010 GHz - 30,026,547,711 instructions # 2.31 insn per cycle - 4.369885993 seconds time elapsed + 12,879,401,549 cycles # 2.982 GHz + 28,276,545,925 instructions # 2.20 insn per cycle + 4.377652629 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -108,20 +110,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.075924e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.076143e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.076143e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.400950e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.401188e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.401188e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.545095 sec +TOTAL : 6.287943 sec INFO: No Floating Point Exceptions have been reported - 18,993,218,804 cycles # 2.901 GHz - 53,909,508,309 instructions # 2.84 insn per cycle - 6.549040643 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32438) (avx2: 0) (512y: 0) (512z: 0) + 18,917,133,316 cycles # 3.007 GHz + 53,900,822,413 instructions # 2.85 insn per cycle + 6.291810989 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32425) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -137,20 +140,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.634824e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.634915e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.634915e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.588454e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.588541e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.588541e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.239041 sec +TOTAL : 3.326167 sec INFO: No Floating Point Exceptions have been reported - 9,844,142,959 cycles # 3.038 GHz - 27,159,907,056 instructions # 2.76 insn per cycle - 3.243158367 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96511) (avx2: 0) (512y: 0) (512z: 0) + 9,981,726,497 cycles # 2.998 GHz + 27,151,411,979 instructions # 2.72 insn per cycle + 3.330120405 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96499) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -166,20 +170,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.522862e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.523280e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.523280e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.463521e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.463922e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.463922e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.509038 sec +TOTAL : 1.526941 sec INFO: No Floating Point Exceptions have been reported - 4,285,489,952 cycles # 2.833 GHz - 9,598,013,083 instructions # 2.24 insn per cycle - 1.513170310 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84989) (512y: 0) (512z: 0) + 4,301,902,923 cycles # 2.811 GHz + 9,590,835,987 instructions # 2.23 insn per cycle + 1.530966019 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84971) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -195,20 +200,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.062060e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.062617e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.062617e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.003469e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.004081e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.004081e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.305600 sec +TOTAL : 1.322167 sec INFO: No Floating Point Exceptions have been reported - 3,731,786,493 cycles # 2.852 GHz - 8,522,102,351 instructions # 2.28 insn per cycle - 1.309705466 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80643) (512y: 89) (512z: 0) + 3,729,352,964 cycles # 2.814 GHz + 8,515,368,436 instructions # 2.28 insn per cycle + 1.326036505 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80619) (512y: 89) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -224,20 +230,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.604372e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.604990e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.604990e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.565416e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.566063e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.566063e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.470888 sec +TOTAL : 1.483865 sec INFO: No Floating Point Exceptions have been reported - 2,708,113,908 cycles # 1.837 GHz - 4,288,944,422 instructions # 1.58 insn per cycle - 1.474912429 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2876) (512y: 103) (512z:79119) + 2,695,897,083 cycles # 1.813 GHz + 4,281,463,157 instructions # 1.59 insn per cycle + 1.487939257 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2852) (512y: 103) (512z:79119) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 657598560e..618d256396 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-06-28_20:27:25 +DATE: 2024-08-08_20:00:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.060868e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.061271e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.061430e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.058227e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.058613e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.058749e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.430160 sec +TOTAL : 2.446864 sec INFO: No Floating Point Exceptions have been reported - 8,275,897,121 cycles # 3.006 GHz - 18,470,926,867 instructions # 2.23 insn per cycle - 2.811196040 seconds time elapsed + 8,303,278,275 cycles # 3.000 GHz + 18,645,596,525 instructions # 2.25 insn per cycle + 2.826809106 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.235367e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.237271e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.237519e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.233958e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.236030e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.236303e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.996473 sec +TOTAL : 4.007873 sec INFO: No Floating Point Exceptions have been reported - 13,050,093,352 cycles # 3.020 GHz - 31,028,947,131 instructions # 2.38 insn per cycle - 4.377792426 seconds time elapsed + 12,910,025,920 cycles # 2.976 GHz + 30,025,616,729 instructions # 2.33 insn per cycle + 4.392667162 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.480305e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.480556e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.480556e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.875983e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.876201e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.876201e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.241708 sec +TOTAL : 6.703762 sec INFO: No Floating Point Exceptions have been reported - 18,821,174,280 cycles # 3.014 GHz - 53,941,028,180 instructions # 2.87 insn per cycle - 6.245850686 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32036) (avx2: 0) (512y: 0) (512z: 0) + 18,880,147,773 cycles # 2.815 GHz + 53,931,698,860 instructions # 2.86 insn per cycle + 6.707560831 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32023) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.609430e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.609514e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.609514e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.621951e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.622050e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.622050e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.285465 sec +TOTAL : 3.258110 sec INFO: No Floating Point Exceptions have been reported - 9,981,253,621 cycles # 3.035 GHz - 27,137,548,724 instructions # 2.72 insn per cycle - 3.289453787 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96387) (avx2: 0) (512y: 0) (512z: 0) + 9,846,977,880 cycles # 3.019 GHz + 27,128,812,737 instructions # 2.76 insn per cycle + 3.262446550 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.556423e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.556875e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.556875e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.448151e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.448577e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.448577e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.489857 sec +TOTAL : 1.533013 sec INFO: No Floating Point Exceptions have been reported - 4,244,835,717 cycles # 2.843 GHz - 9,591,311,003 instructions # 2.26 insn per cycle - 1.494070789 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84996) (512y: 0) (512z: 0) + 4,309,903,765 cycles # 2.805 GHz + 9,584,249,957 instructions # 2.22 insn per cycle + 1.537048676 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84978) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.060696e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.061222e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.061222e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.985777e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.986306e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.986306e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.305460 sec +TOTAL : 1.327029 sec INFO: No Floating Point Exceptions have been reported - 3,733,750,272 cycles # 2.853 GHz - 8,514,034,293 instructions # 2.28 insn per cycle - 1.309432612 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80666) (512y: 239) (512z: 0) + 3,743,360,462 cycles # 2.814 GHz + 8,506,735,194 instructions # 2.27 insn per cycle + 1.330926412 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80642) (512y: 239) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.642371e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.642906e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.642906e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.581234e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.581805e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.581805e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.454904 sec +TOTAL : 1.477295 sec INFO: No Floating Point Exceptions have been reported - 2,700,741,707 cycles # 1.852 GHz - 4,287,448,214 instructions # 1.59 insn per cycle - 1.458920670 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2713) (512y: 185) (512z:79103) + 2,699,035,749 cycles # 1.824 GHz + 4,280,090,319 instructions # 1.59 insn per cycle + 1.480967463 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2689) (512y: 185) (512z:79103) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 1ec1f218cc..b4fc180cc1 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-06-28_20:28:17 +DATE: 2024-08-08_20:02:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.293372e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.294106e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.294365e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.298150e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.298890e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.299224e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.734398 sec +TOTAL : 1.751662 sec INFO: No Floating Point Exceptions have been reported - 5,998,471,870 cycles # 3.013 GHz - 12,941,477,945 instructions # 2.16 insn per cycle - 2.047491345 seconds time elapsed + 5,936,795,436 cycles # 2.952 GHz + 12,013,270,651 instructions # 2.02 insn per cycle + 2.067502844 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.166641e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.167275e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.167430e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.155180e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.155800e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.155887e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 -TOTAL : 2.041320 sec +TOTAL : 2.055202 sec INFO: No Floating Point Exceptions have been reported - 6,882,695,469 cycles # 2.993 GHz - 15,335,035,349 instructions # 2.23 insn per cycle - 2.356075989 seconds time elapsed + 6,915,039,139 cycles # 2.986 GHz + 14,633,712,669 instructions # 2.12 insn per cycle + 2.372054868 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.760616e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.760920e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.760920e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.752648e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.752917e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.752917e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.032900 sec +TOTAL : 6.035465 sec INFO: No Floating Point Exceptions have been reported - 18,362,673,487 cycles # 3.042 GHz - 53,916,281,327 instructions # 2.94 insn per cycle - 6.036929247 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20155) (avx2: 0) (512y: 0) (512z: 0) + 18,171,458,820 cycles # 3.009 GHz + 53,912,614,149 instructions # 2.97 insn per cycle + 6.039280806 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20142) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.307614e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.308000e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.308000e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.468219e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.468626e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.468626e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.602584 sec +TOTAL : 1.524160 sec INFO: No Floating Point Exceptions have been reported - 4,635,363,924 cycles # 2.886 GHz - 13,813,886,206 instructions # 2.98 insn per cycle - 1.606784601 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:97034) (avx2: 0) (512y: 0) (512z: 0) + 4,594,690,732 cycles # 3.008 GHz + 13,806,361,271 instructions # 3.00 insn per cycle + 1.528090955 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:97022) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.014969e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.016646e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.016646e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.022651e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.024377e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.024377e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.757122 sec +TOTAL : 0.754295 sec INFO: No Floating Point Exceptions have been reported - 2,168,880,892 cycles # 2.852 GHz - 4,843,034,742 instructions # 2.23 insn per cycle - 0.761157343 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85515) (512y: 0) (512z: 0) + 2,137,910,409 cycles # 2.822 GHz + 4,835,783,841 instructions # 2.26 insn per cycle + 0.758250875 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85497) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.123850e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.126172e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.126172e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.922130e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.924339e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.924339e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.654383 sec +TOTAL : 0.668838 sec INFO: No Floating Point Exceptions have been reported - 1,871,543,363 cycles # 2.845 GHz - 4,297,216,044 instructions # 2.30 insn per cycle - 0.658314207 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81214) (512y: 44) (512z: 0) + 1,877,666,899 cycles # 2.793 GHz + 4,290,021,460 instructions # 2.28 insn per cycle + 0.672738963 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81190) (512y: 44) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.212794e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.214860e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.214860e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.249467e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.251538e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.251538e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.737304 sec +TOTAL : 0.730439 sec INFO: No Floating Point Exceptions have been reported - 1,364,348,136 cycles # 1.843 GHz - 2,168,826,929 instructions # 1.59 insn per cycle - 0.741185900 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3493) (512y: 47) (512z:79334) + 1,353,764,576 cycles # 1.845 GHz + 2,161,505,151 instructions # 1.60 insn per cycle + 0.734391470 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3469) (512y: 47) (512z:79334) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 19c3a156fb..2973bcd9f9 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-06-28_20:54:09 +DATE: 2024-08-08_20:21:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -53,15 +53,15 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.304682e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.306407e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.306407e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.303570e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.305124e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.305124e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187093e-05 +- 9.825663e-06 ) GeV^-6 -TOTAL : 1.682174 sec +TOTAL : 1.683838 sec INFO: No Floating Point Exceptions have been reported - 5,852,031,284 cycles # 3.020 GHz - 12,722,485,881 instructions # 2.17 insn per cycle - 1.994323218 seconds time elapsed + 5,740,674,837 cycles # 2.959 GHz + 12,183,340,475 instructions # 2.12 insn per cycle + 1.996602458 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -79,18 +79,20 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.119434e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.130595e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.130595e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.128072e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.139024e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.139024e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856440e-04 +- 8.331091e-05 ) GeV^-6 -TOTAL : 2.023198 sec +TOTAL : 2.036931 sec INFO: No Floating Point Exceptions have been reported - 6,866,209,578 cycles # 3.012 GHz - 14,247,651,349 instructions # 2.08 insn per cycle - 2.335730547 seconds time elapsed + 6,817,978,012 cycles # 2.973 GHz + 15,086,512,597 instructions # 2.21 insn per cycle + 2.349967443 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -108,20 +110,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.811807e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.812078e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.812078e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.676163e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.676428e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.676428e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.996292 sec +TOTAL : 6.087276 sec INFO: No Floating Point Exceptions have been reported - 18,222,904,500 cycles # 3.038 GHz - 53,917,190,034 instructions # 2.96 insn per cycle - 6.000226691 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20155) (avx2: 0) (512y: 0) (512z: 0) + 18,179,826,190 cycles # 2.985 GHz + 53,910,247,266 instructions # 2.97 insn per cycle + 6.091212728 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20142) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -137,20 +140,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.510518e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.510973e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.510973e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.464690e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.465102e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.465102e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.509640 sec +TOTAL : 1.525630 sec INFO: No Floating Point Exceptions have been reported - 4,625,465,113 cycles # 3.057 GHz - 13,814,506,056 instructions # 2.99 insn per cycle - 1.513610135 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:97034) (avx2: 0) (512y: 0) (512z: 0) + 4,590,585,740 cycles # 3.003 GHz + 13,807,319,566 instructions # 3.01 insn per cycle + 1.529386769 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:97022) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -166,20 +170,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.936529e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.938180e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.938180e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.967974e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.969738e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.969738e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.766429 sec +TOTAL : 0.760104 sec INFO: No Floating Point Exceptions have been reported - 2,173,283,102 cycles # 2.824 GHz - 4,844,170,694 instructions # 2.23 insn per cycle - 0.770393352 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85515) (512y: 0) (512z: 0) + 2,138,286,262 cycles # 2.802 GHz + 4,837,282,487 instructions # 2.26 insn per cycle + 0.763970265 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85497) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -195,20 +200,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.970392e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.972585e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.972585e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.967332e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.969544e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.969544e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.667630 sec +TOTAL : 0.664857 sec INFO: No Floating Point Exceptions have been reported - 1,889,307,000 cycles # 2.817 GHz - 4,298,323,850 instructions # 2.28 insn per cycle - 0.671620510 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81214) (512y: 44) (512z: 0) + 1,870,319,411 cycles # 2.799 GHz + 4,291,006,476 instructions # 2.29 insn per cycle + 0.668734591 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81190) (512y: 44) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -224,20 +230,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.302679e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.304907e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.304907e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.241242e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.243401e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.243401e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.728718 sec +TOTAL : 0.731334 sec INFO: No Floating Point Exceptions have been reported - 1,362,980,476 cycles # 1.862 GHz - 2,169,928,812 instructions # 1.59 insn per cycle - 0.732739722 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3493) (512y: 47) (512z:79334) + 1,357,966,074 cycles # 1.849 GHz + 2,162,865,434 instructions # 1.59 insn per cycle + 0.735255583 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3469) (512y: 47) (512z:79334) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index ca329577bc..cfac3f719e 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-06-28_20:28:55 +DATE: 2024-08-08_20:02:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.286645e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.287498e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.287809e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.289590e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.290901e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.291153e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.736228 sec +TOTAL : 1.752222 sec INFO: No Floating Point Exceptions have been reported - 6,001,807,556 cycles # 3.011 GHz - 11,967,206,185 instructions # 1.99 insn per cycle - 2.050151331 seconds time elapsed + 6,011,479,262 cycles # 2.988 GHz + 11,822,786,435 instructions # 1.97 insn per cycle + 2.068235514 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.143364e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.143971e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.144046e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.118039e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.118627e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.118705e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 -TOTAL : 2.058089 sec +TOTAL : 2.087174 sec INFO: No Floating Point Exceptions have been reported - 7,010,517,846 cycles # 3.026 GHz - 15,562,554,743 instructions # 2.22 insn per cycle - 2.372415375 seconds time elapsed + 7,020,765,748 cycles # 2.977 GHz + 15,445,166,662 instructions # 2.20 insn per cycle + 2.414506634 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.771160e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.771418e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.771418e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.753426e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.753693e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.753693e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.026290 sec +TOTAL : 6.033711 sec INFO: No Floating Point Exceptions have been reported - 18,379,600,352 cycles # 3.049 GHz - 53,901,368,839 instructions # 2.93 insn per cycle - 6.030226018 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20155) (avx2: 0) (512y: 0) (512z: 0) + 18,095,249,979 cycles # 2.998 GHz + 53,894,797,748 instructions # 2.98 insn per cycle + 6.037598164 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20142) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.509821e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.510256e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.510256e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.476703e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.477111e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.477111e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.509733 sec +TOTAL : 1.520725 sec INFO: No Floating Point Exceptions have been reported - 4,595,160,001 cycles # 3.037 GHz - 13,806,656,105 instructions # 3.00 insn per cycle - 1.513907845 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96669) (avx2: 0) (512y: 0) (512z: 0) + 4,582,334,771 cycles # 3.007 GHz + 13,799,523,503 instructions # 3.01 insn per cycle + 1.524516230 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96657) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.059545e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.061253e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.061253e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.920572e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.922271e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.922271e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.752009 sec +TOTAL : 0.764164 sec INFO: No Floating Point Exceptions have been reported - 2,140,130,377 cycles # 2.833 GHz - 4,847,155,413 instructions # 2.26 insn per cycle - 0.756177798 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85905) (512y: 0) (512z: 0) + 2,153,123,984 cycles # 2.806 GHz + 4,840,163,805 instructions # 2.25 insn per cycle + 0.767980176 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85887) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.995651e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.997972e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.997972e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.954158e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.956209e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.956209e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.665225 sec +TOTAL : 0.665841 sec INFO: No Floating Point Exceptions have been reported - 1,902,280,846 cycles # 2.845 GHz - 4,300,952,608 instructions # 2.26 insn per cycle - 0.669196467 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81754) (512y: 24) (512z: 0) + 1,891,343,146 cycles # 2.826 GHz + 4,293,658,543 instructions # 2.27 insn per cycle + 0.669786991 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81730) (512y: 24) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.312392e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.314887e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.314887e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.171151e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.173263e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.173263e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.728015 sec +TOTAL : 0.740474 sec INFO: No Floating Point Exceptions have been reported - 1,370,437,119 cycles # 1.874 GHz - 2,175,702,006 instructions # 1.59 insn per cycle - 0.731950621 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4106) (512y: 32) (512z:79555) + 1,358,622,018 cycles # 1.827 GHz + 2,168,397,288 instructions # 1.60 insn per cycle + 0.744609857 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4082) (512y: 32) (512z:79555) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 851a39f552..30f43d1d54 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-06-28_20:29:34 +DATE: 2024-08-08_20:03:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.691814e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.692323e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.692498e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.679462e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.679946e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.680144e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.173801 sec +TOTAL : 2.195383 sec INFO: No Floating Point Exceptions have been reported - 7,533,775,938 cycles # 3.018 GHz - 16,627,293,057 instructions # 2.21 insn per cycle - 2.552141855 seconds time elapsed + 7,438,879,261 cycles # 2.953 GHz + 16,326,818,821 instructions # 2.19 insn per cycle + 2.577345674 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.107805e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.108082e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.108125e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.108202e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.108498e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108526e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.402776 sec +TOTAL : 3.425728 sec INFO: No Floating Point Exceptions have been reported - 11,035,265,539 cycles # 2.949 GHz - 25,209,132,705 instructions # 2.28 insn per cycle - 3.797925240 seconds time elapsed + 11,268,079,350 cycles # 3.003 GHz + 26,526,619,371 instructions # 2.35 insn per cycle + 3.809078207 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.863438e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.863651e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.863651e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.696399e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.696636e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.696636e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.720162 sec +TOTAL : 6.867954 sec INFO: No Floating Point Exceptions have been reported - 19,171,030,465 cycles # 2.852 GHz - 54,139,658,780 instructions # 2.82 insn per cycle - 6.724069680 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32014) (avx2: 0) (512y: 0) (512z: 0) + 19,211,187,371 cycles # 2.796 GHz + 54,136,498,902 instructions # 2.82 insn per cycle + 6.871886606 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32001) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.605795e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.605881e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.605881e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.599481e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.599571e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.599571e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.294366 sec +TOTAL : 3.303538 sec INFO: No Floating Point Exceptions have been reported - 9,394,914,106 cycles # 2.849 GHz - 26,193,668,002 instructions # 2.79 insn per cycle - 3.298386549 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96060) (avx2: 0) (512y: 0) (512z: 0) + 9,333,906,777 cycles # 2.823 GHz + 26,186,384,503 instructions # 2.81 insn per cycle + 3.307369825 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96048) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.715414e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.715875e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.715875e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.642781e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.643249e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.643249e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.427067 sec +TOTAL : 1.453378 sec INFO: No Floating Point Exceptions have been reported - 4,066,233,290 cycles # 2.843 GHz - 9,255,492,192 instructions # 2.28 insn per cycle - 1.431038183 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84396) (512y: 0) (512z: 0) + 4,089,405,470 cycles # 2.807 GHz + 9,248,953,263 instructions # 2.26 insn per cycle + 1.457404649 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84378) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.244987e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.245575e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.245575e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.265363e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.265985e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.265985e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.249256 sec +TOTAL : 1.239836 sec INFO: No Floating Point Exceptions have been reported - 3,550,344,735 cycles # 2.835 GHz - 8,190,000,965 instructions # 2.31 insn per cycle - 1.253239411 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80027) (512y: 79) (512z: 0) + 3,507,542,927 cycles # 2.822 GHz + 8,182,646,854 instructions # 2.33 insn per cycle + 1.243760162 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80003) (512y: 79) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.733572e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.734190e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.734190e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.616663e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.617178e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.617178e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.419842 sec +TOTAL : 1.461797 sec INFO: No Floating Point Exceptions have been reported - 2,629,781,082 cycles # 1.848 GHz - 4,178,874,518 instructions # 1.59 insn per cycle - 1.423895743 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2613) (512y: 93) (512z:78909) + 2,666,404,255 cycles # 1.820 GHz + 4,171,669,153 instructions # 1.56 insn per cycle + 1.465874998 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2589) (512y: 93) (512z:78909) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index 01dde53669..7b7d65b2d2 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2024-06-28_20:30:25 +DATE: 2024-08-08_20:04:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.677056e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.677559e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.677740e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.675385e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.675879e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.676008e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.176443 sec +TOTAL : 2.190431 sec INFO: No Floating Point Exceptions have been reported - 7,487,683,814 cycles # 2.999 GHz - 16,520,220,397 instructions # 2.21 insn per cycle - 2.555460487 seconds time elapsed + 7,517,385,120 cycles # 2.989 GHz + 15,570,357,961 instructions # 2.07 insn per cycle + 2.571136488 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.110219e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.110497e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.110535e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.109468e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.109746e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.109778e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.410801 sec +TOTAL : 3.419906 sec INFO: No Floating Point Exceptions have been reported - 11,197,151,914 cycles # 2.999 GHz - 25,393,591,354 instructions # 2.27 insn per cycle - 3.792719865 seconds time elapsed + 11,221,781,722 cycles # 2.994 GHz + 24,236,211,120 instructions # 2.16 insn per cycle + 3.803243859 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.710962e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.711166e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.711166e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.902849e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.903107e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.903107e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.858638 sec +TOTAL : 6.673081 sec INFO: No Floating Point Exceptions have been reported - 19,224,649,899 cycles # 2.802 GHz - 54,164,766,817 instructions # 2.82 insn per cycle - 6.862669409 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32216) (avx2: 0) (512y: 0) (512z: 0) + 19,149,429,604 cycles # 2.868 GHz + 54,156,492,076 instructions # 2.83 insn per cycle + 6.676939828 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32203) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.613373e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.613459e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.613459e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.571432e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.571520e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.571520e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.277132 sec +TOTAL : 3.363251 sec INFO: No Floating Point Exceptions have been reported - 9,318,655,439 cycles # 2.841 GHz - 26,093,521,108 instructions # 2.80 insn per cycle - 3.281273932 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95949) (avx2: 0) (512y: 0) (512z: 0) + 9,398,223,848 cycles # 2.792 GHz + 26,086,325,143 instructions # 2.78 insn per cycle + 3.367354553 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95937) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.612730e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.613184e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.613184e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.625397e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.625854e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.625854e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.466168 sec +TOTAL : 1.456994 sec INFO: No Floating Point Exceptions have been reported - 4,047,902,147 cycles # 2.756 GHz - 9,220,799,448 instructions # 2.28 insn per cycle - 1.470272690 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83870) (512y: 0) (512z: 0) + 4,075,335,135 cycles # 2.792 GHz + 9,212,511,442 instructions # 2.26 insn per cycle + 1.460794766 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83852) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.298679e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.299308e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.299308e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.243367e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.244047e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.244047e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.234280 sec +TOTAL : 1.245554 sec INFO: No Floating Point Exceptions have been reported - 3,526,966,960 cycles # 2.851 GHz - 8,174,270,516 instructions # 2.32 insn per cycle - 1.238238467 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79433) (512y: 229) (512z: 0) + 3,512,150,002 cycles # 2.812 GHz + 8,166,955,109 instructions # 2.33 insn per cycle + 1.249525029 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79409) (512y: 229) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.733439e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.733991e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.733991e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.660094e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.660683e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.660683e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.419623 sec +TOTAL : 1.444444 sec INFO: No Floating Point Exceptions have been reported - 2,627,097,541 cycles # 1.846 GHz - 4,173,730,813 instructions # 1.59 insn per cycle - 1.423630153 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1877) (512y: 175) (512z:78883) + 2,623,623,826 cycles # 1.812 GHz + 4,166,476,704 instructions # 1.59 insn per cycle + 1.448438406 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1853) (512y: 175) (512z:78883) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 6c1b4ea2ca..dc70f1aa96 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-06-28_20:25:15 +DATE: 2024-08-08_19:58:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.790022e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.356109e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.700619e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.793830e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.275665e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.618309e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.453336 sec +TOTAL : 0.446682 sec INFO: No Floating Point Exceptions have been reported - 1,989,983,180 cycles # 2.947 GHz - 2,785,844,872 instructions # 1.40 insn per cycle - 0.788940065 seconds time elapsed + 1,973,218,669 cycles # 2.938 GHz + 2,737,206,349 instructions # 1.39 insn per cycle + 0.728215190 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.587316e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.180244e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.537381e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.512201e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.215148e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.564113e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.524586 sec +TOTAL : 0.528377 sec INFO: No Floating Point Exceptions have been reported - 2,302,234,774 cycles # 2.983 GHz - 3,293,556,186 instructions # 1.43 insn per cycle - 0.829451283 seconds time elapsed + 2,273,295,859 cycles # 2.942 GHz + 3,270,605,178 instructions # 1.44 insn per cycle + 0.829840488 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.097573e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.120521e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.120521e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.087919e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.111512e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111512e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.514920 sec +TOTAL : 1.525836 sec INFO: No Floating Point Exceptions have been reported - 4,626,793,284 cycles # 3.048 GHz - 13,198,012,215 instructions # 2.85 insn per cycle - 1.522558693 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 720) (avx2: 0) (512y: 0) (512z: 0) + 4,620,985,524 cycles # 3.021 GHz + 13,191,789,695 instructions # 2.85 insn per cycle + 1.530034055 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.950778e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.023577e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.023577e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.913767e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.985469e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.985469e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.860945 sec +TOTAL : 0.875694 sec INFO: No Floating Point Exceptions have been reported - 2,649,662,880 cycles # 3.065 GHz - 7,563,277,875 instructions # 2.85 insn per cycle - 0.868046964 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3116) (avx2: 0) (512y: 0) (512z: 0) + 2,645,390,944 cycles # 3.009 GHz + 7,556,169,585 instructions # 2.86 insn per cycle + 0.879849311 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.288545e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.501162e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.501162e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.250464e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.457998e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.457998e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.521153 sec +TOTAL : 0.522755 sec INFO: No Floating Point Exceptions have been reported - 1,496,320,787 cycles # 2.854 GHz - 3,166,843,366 instructions # 2.12 insn per cycle - 0.528925313 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3002) (512y: 0) (512z: 0) + 1,489,187,494 cycles # 2.830 GHz + 3,159,085,018 instructions # 2.12 insn per cycle + 0.526770948 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2984) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.533993e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.785485e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.785485e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.609694e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.866945e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.866945e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.487338 sec +TOTAL : 0.473366 sec INFO: No Floating Point Exceptions have been reported - 1,356,346,660 cycles # 2.768 GHz - 3,021,733,089 instructions # 2.23 insn per cycle - 0.493578513 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2769) (512y: 104) (512z: 0) + 1,347,276,225 cycles # 2.825 GHz + 3,016,026,977 instructions # 2.24 insn per cycle + 0.477451794 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2745) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.478813e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.598693e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.598693e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.459896e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.579821e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.579821e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.686044 sec +TOTAL : 0.687520 sec INFO: No Floating Point Exceptions have been reported - 1,333,386,243 cycles # 1.934 GHz - 1,969,640,769 instructions # 1.48 insn per cycle - 0.692703053 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1391) (512y: 106) (512z: 2217) + 1,326,541,553 cycles # 1.920 GHz + 1,964,358,241 instructions # 1.48 insn per cycle + 0.691777094 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1367) (512y: 106) (512z: 2217) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index cab9bcc977..280fcce352 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-06-28_20:51:54 +DATE: 2024-08-08_20:19:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -53,15 +53,15 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.479240e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.008789e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.008789e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.684298e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.299204e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.299204e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.475656 sec +TOTAL : 0.471497 sec INFO: No Floating Point Exceptions have been reported - 2,038,893,126 cycles # 2.960 GHz - 2,977,070,828 instructions # 1.46 insn per cycle - 0.745313870 seconds time elapsed + 2,016,663,667 cycles # 2.932 GHz + 2,996,818,007 instructions # 1.49 insn per cycle + 0.744526851 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -79,18 +79,20 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.327342e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.480237e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.480237e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.407307e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.579683e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.579683e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.748104 sec +TOTAL : 0.738495 sec INFO: No Floating Point Exceptions have been reported - 2,948,004,634 cycles # 2.961 GHz - 4,497,963,430 instructions # 1.53 insn per cycle - 1.054638211 seconds time elapsed + 2,913,311,119 cycles # 2.959 GHz + 4,473,148,579 instructions # 1.54 insn per cycle + 1.042109459 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -108,20 +110,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.078327e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.102056e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.102056e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.071825e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.094847e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.094847e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.548417 sec +TOTAL : 1.553859 sec INFO: No Floating Point Exceptions have been reported - 4,667,469,364 cycles # 3.007 GHz - 13,202,731,600 instructions # 2.83 insn per cycle - 1.552883532 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 720) (avx2: 0) (512y: 0) (512z: 0) + 4,647,790,593 cycles # 2.984 GHz + 13,197,257,990 instructions # 2.84 insn per cycle + 1.558215122 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -137,20 +140,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.932132e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.004159e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.004159e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.902347e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.973784e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.973784e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.875687 sec +TOTAL : 0.886591 sec INFO: No Floating Point Exceptions have been reported - 2,682,056,903 cycles # 3.050 GHz - 7,610,496,007 instructions # 2.84 insn per cycle - 0.880075632 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3116) (avx2: 0) (512y: 0) (512z: 0) + 2,676,044,915 cycles # 3.006 GHz + 7,604,510,010 instructions # 2.84 insn per cycle + 0.890913281 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -166,20 +170,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.293837e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.504426e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.504426e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.212543e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.422665e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.422665e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.526616 sec +TOTAL : 0.536325 sec INFO: No Floating Point Exceptions have been reported - 1,529,323,143 cycles # 2.883 GHz - 3,217,059,809 instructions # 2.10 insn per cycle - 0.531008511 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3002) (512y: 0) (512z: 0) + 1,528,484,723 cycles # 2.830 GHz + 3,209,947,960 instructions # 2.10 insn per cycle + 0.540711031 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2984) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -195,20 +200,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.666271e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.933201e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.933201e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.560716e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.811838e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.811838e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.475965 sec +TOTAL : 0.486090 sec INFO: No Floating Point Exceptions have been reported - 1,385,254,715 cycles # 2.890 GHz - 3,070,395,317 instructions # 2.22 insn per cycle - 0.480322475 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2769) (512y: 104) (512z: 0) + 1,376,959,578 cycles # 2.811 GHz + 3,063,340,210 instructions # 2.22 insn per cycle + 0.490411106 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2745) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -224,20 +230,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.294434e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.409827e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.409827e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.438051e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.554379e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.554379e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.747092 sec +TOTAL : 0.699323 sec INFO: No Floating Point Exceptions have been reported - 1,380,748,031 cycles # 1.839 GHz - 2,007,713,125 instructions # 1.45 insn per cycle - 0.751567886 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1391) (512y: 106) (512z: 2217) + 1,353,225,054 cycles # 1.926 GHz + 1,999,803,163 instructions # 1.48 insn per cycle + 0.703554082 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1367) (512y: 106) (512z: 2217) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 1c33195921..0801a72f2e 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-06-28_20:25:28 +DATE: 2024-08-08_19:58:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.775583e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.188831e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.517095e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.715940e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.160616e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.486831e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.446354 sec +TOTAL : 0.449924 sec INFO: No Floating Point Exceptions have been reported - 1,978,395,800 cycles # 2.952 GHz - 2,771,408,017 instructions # 1.40 insn per cycle - 0.796607303 seconds time elapsed + 1,942,000,933 cycles # 2.932 GHz + 2,723,193,332 instructions # 1.40 insn per cycle + 0.721112435 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.570046e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.090310e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.431455e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.484674e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.054198e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.395966e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.526554 sec +TOTAL : 0.530941 sec INFO: No Floating Point Exceptions have been reported - 2,277,253,734 cycles # 2.972 GHz - 3,270,762,044 instructions # 1.44 insn per cycle - 0.823669445 seconds time elapsed + 2,253,028,696 cycles # 2.947 GHz + 3,232,782,518 instructions # 1.43 insn per cycle + 0.823488099 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.099420e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.122937e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.122937e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.055734e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.078647e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.078647e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.512747 sec +TOTAL : 1.572076 sec INFO: No Floating Point Exceptions have been reported - 4,627,982,694 cycles # 3.052 GHz - 13,186,184,949 instructions # 2.85 insn per cycle - 1.519924373 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 705) (avx2: 0) (512y: 0) (512z: 0) + 4,625,532,940 cycles # 2.937 GHz + 13,181,547,125 instructions # 2.85 insn per cycle + 1.575799334 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 692) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.949013e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.023241e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.023241e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.856450e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.926302e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.926302e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.863276 sec +TOTAL : 0.902285 sec INFO: No Floating Point Exceptions have been reported - 2,643,207,158 cycles # 3.053 GHz - 7,560,625,846 instructions # 2.86 insn per cycle - 0.869204519 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3110) (avx2: 0) (512y: 0) (512z: 0) + 2,641,918,143 cycles # 2.918 GHz + 7,554,356,585 instructions # 2.86 insn per cycle + 0.906092774 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.287048e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.500557e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.500557e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.249746e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.464508e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.464508e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.520133 sec +TOTAL : 0.523830 sec INFO: No Floating Point Exceptions have been reported - 1,497,606,910 cycles # 2.858 GHz - 3,165,816,703 instructions # 2.11 insn per cycle - 0.527177034 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2987) (512y: 0) (512z: 0) + 1,491,771,401 cycles # 2.831 GHz + 3,160,437,103 instructions # 2.12 insn per cycle + 0.527543251 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2969) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.489743e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.732662e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.732662e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.610049e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.870786e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.870786e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.492703 sec +TOTAL : 0.473152 sec INFO: No Floating Point Exceptions have been reported - 1,355,352,613 cycles # 2.736 GHz - 3,018,112,059 instructions # 2.23 insn per cycle - 0.498338855 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2743) (512y: 104) (512z: 0) + 1,347,000,026 cycles # 2.829 GHz + 3,012,563,261 instructions # 2.24 insn per cycle + 0.476761119 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2719) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.482908e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.602468e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.602468e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.451125e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.569830e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.569830e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.684096 sec +TOTAL : 0.689809 sec INFO: No Floating Point Exceptions have been reported - 1,331,295,161 cycles # 1.935 GHz - 1,967,793,261 instructions # 1.48 insn per cycle - 0.690431223 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1368) (512y: 106) (512z: 2217) + 1,325,269,157 cycles # 1.912 GHz + 1,962,212,225 instructions # 1.48 insn per cycle + 0.693734086 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1344) (512y: 106) (512z: 2217) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 850aaf835d..776a8e7cf2 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-06-28_20:25:41 +DATE: 2024-08-08_19:58:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.293532e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.042506e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.137949e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.177753e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.044280e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.137137e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.451562 sec +TOTAL : 0.446256 sec INFO: No Floating Point Exceptions have been reported - 1,965,513,776 cycles # 2.824 GHz - 2,635,583,653 instructions # 1.34 insn per cycle - 0.859142260 seconds time elapsed + 1,967,028,633 cycles # 2.927 GHz + 2,729,560,871 instructions # 1.39 insn per cycle + 0.730482007 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 165 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.368956e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.537935e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.624387e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.302708e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.525963e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.623999e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 -TOTAL : 0.475188 sec +TOTAL : 0.480335 sec INFO: No Floating Point Exceptions have been reported - 2,098,261,664 cycles # 2.944 GHz - 2,954,481,606 instructions # 1.41 insn per cycle - 0.769790018 seconds time elapsed + 2,062,608,643 cycles # 2.922 GHz + 2,954,769,461 instructions # 1.43 insn per cycle + 0.763163038 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.150060e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.176080e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.176080e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.132642e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.159370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.159370e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.446431 sec +TOTAL : 1.464071 sec INFO: No Floating Point Exceptions have been reported - 4,411,772,032 cycles # 3.045 GHz - 12,958,210,870 instructions # 2.94 insn per cycle - 1.451536636 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 658) (avx2: 0) (512y: 0) (512z: 0) + 4,406,453,406 cycles # 3.003 GHz + 12,951,424,799 instructions # 2.94 insn per cycle + 1.468164938 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.900734e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.084469e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.084469e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.856948e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.035260e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.035260e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.586035 sec +TOTAL : 0.590761 sec INFO: No Floating Point Exceptions have been reported - 1,737,693,108 cycles # 2.951 GHz - 4,549,270,467 instructions # 2.62 insn per cycle - 0.593914325 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3638) (avx2: 0) (512y: 0) (512z: 0) + 1,725,972,010 cycles # 2.906 GHz + 4,541,556,745 instructions # 2.63 insn per cycle + 0.594447330 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3626) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.861820e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.580664e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.580664e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.798317e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.520080e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.520080e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.300710 sec +TOTAL : 0.300105 sec INFO: No Floating Point Exceptions have been reported - 862,449,479 cycles # 2.844 GHz - 1,924,591,814 instructions # 2.23 insn per cycle - 0.306904344 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3584) (512y: 0) (512z: 0) + 854,524,206 cycles # 2.821 GHz + 1,917,397,512 instructions # 2.24 insn per cycle + 0.303595328 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3566) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.340644e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.181705e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.181705e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.187295e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.004492e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.004492e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.279461 sec +TOTAL : 0.282163 sec INFO: No Floating Point Exceptions have been reported - 808,719,930 cycles # 2.865 GHz - 1,841,626,395 instructions # 2.28 insn per cycle - 0.284314491 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3414) (512y: 22) (512z: 0) + 807,334,376 cycles # 2.832 GHz + 1,834,144,656 instructions # 2.27 insn per cycle + 0.285676418 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3390) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.796043e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.289013e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.289013e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.697538e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.170455e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.170455e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.364949 sec +TOTAL : 0.368301 sec INFO: No Floating Point Exceptions have been reported - 736,833,879 cycles # 2.005 GHz - 1,315,783,402 instructions # 1.79 insn per cycle - 0.370048804 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2005) (512y: 32) (512z: 2432) + 729,603,114 cycles # 1.965 GHz + 1,308,166,262 instructions # 1.79 insn per cycle + 0.371960958 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1942) (512y: 26) (512z: 2432) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index de1a622f9c..e112255ddc 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-06-28_20:52:07 +DATE: 2024-08-08_20:19:22 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -53,15 +53,15 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.522257e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.599137e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.599137e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.675417e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.135496e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.135496e+07 ) sec^-1 MeanMatrixElemValue = ( 2.017654e+01 +- 1.429183e+01 ) GeV^-2 -TOTAL : 0.455736 sec +TOTAL : 0.454896 sec INFO: No Floating Point Exceptions have been reported - 1,976,950,200 cycles # 2.968 GHz - 2,931,197,541 instructions # 1.48 insn per cycle - 0.723128470 seconds time elapsed + 1,922,075,239 cycles # 2.886 GHz + 2,812,656,009 instructions # 1.46 insn per cycle + 0.723103268 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -79,18 +79,20 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.174973e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.635538e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.635538e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.230387e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.891837e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.891837e+07 ) sec^-1 MeanMatrixElemValue = ( 2.609941e+02 +- 2.115589e+02 ) GeV^-2 -TOTAL : 0.623300 sec +TOTAL : 0.622542 sec INFO: No Floating Point Exceptions have been reported - 2,540,926,352 cycles # 2.970 GHz - 3,858,630,672 instructions # 1.52 insn per cycle - 0.912725281 seconds time elapsed + 2,509,793,238 cycles # 2.945 GHz + 3,839,626,015 instructions # 1.53 insn per cycle + 0.910444487 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -108,20 +110,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.151240e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.177166e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.177166e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.133555e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.159187e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.159187e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.447433 sec +TOTAL : 1.466168 sec INFO: No Floating Point Exceptions have been reported - 4,427,281,485 cycles # 3.051 GHz - 12,962,707,314 instructions # 2.93 insn per cycle - 1.451753088 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 658) (avx2: 0) (512y: 0) (512z: 0) + 4,419,438,233 cycles # 3.007 GHz + 12,955,838,618 instructions # 2.93 insn per cycle + 1.470344991 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -137,20 +140,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.955554e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.138018e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.138018e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.929772e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.111984e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.111984e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.578329 sec +TOTAL : 0.580373 sec INFO: No Floating Point Exceptions have been reported - 1,758,401,594 cycles # 3.021 GHz - 4,597,076,777 instructions # 2.61 insn per cycle - 0.582909175 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3638) (avx2: 0) (512y: 0) (512z: 0) + 1,747,268,230 cycles # 2.992 GHz + 4,589,745,792 instructions # 2.63 insn per cycle + 0.584483983 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3626) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -166,20 +170,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.760381e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.479793e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.479793e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.766764e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.470194e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.470194e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.308970 sec +TOTAL : 0.305547 sec INFO: No Floating Point Exceptions have been reported - 885,542,338 cycles # 2.833 GHz - 1,961,389,336 instructions # 2.21 insn per cycle - 0.313214968 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3584) (512y: 0) (512z: 0) + 873,235,026 cycles # 2.827 GHz + 1,954,283,245 instructions # 2.24 insn per cycle + 0.309543568 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3566) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -195,20 +200,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.249080e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.082136e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.082136e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.204649e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.052966e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.052966e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.286383 sec +TOTAL : 0.285349 sec INFO: No Floating Point Exceptions have been reported - 829,534,712 cycles # 2.859 GHz - 1,878,022,002 instructions # 2.26 insn per cycle - 0.290621670 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3414) (512y: 22) (512z: 0) + 822,856,149 cycles # 2.849 GHz + 1,871,067,127 instructions # 2.27 insn per cycle + 0.289383401 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3390) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -224,20 +230,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.707763e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.189867e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.189867e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.709235e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.178014e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.178014e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.375256 sec +TOTAL : 0.371559 sec INFO: No Floating Point Exceptions have been reported - 759,283,515 cycles # 2.009 GHz - 1,357,256,338 instructions # 1.79 insn per cycle - 0.379485732 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2005) (512y: 32) (512z: 2432) + 748,105,287 cycles # 1.994 GHz + 1,349,627,266 instructions # 1.80 insn per cycle + 0.375758776 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1942) (512y: 26) (512z: 2432) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index e0566e4426..f4c5647b28 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-06-28_20:25:53 +DATE: 2024-08-08_19:59:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.353171e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.062702e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.165312e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.121935e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.045477e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.150621e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.443834 sec +TOTAL : 0.441822 sec INFO: No Floating Point Exceptions have been reported - 1,941,858,797 cycles # 2.954 GHz - 2,754,746,518 instructions # 1.42 insn per cycle - 0.817724378 seconds time elapsed + 1,919,824,453 cycles # 2.925 GHz + 2,711,548,396 instructions # 1.41 insn per cycle + 0.712257308 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 164 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.429407e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.572953e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.663069e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.453927e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.579708e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.670884e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 -TOTAL : 0.480050 sec +TOTAL : 0.482328 sec INFO: No Floating Point Exceptions have been reported - 2,073,786,110 cycles # 2.949 GHz - 2,967,574,310 instructions # 1.43 insn per cycle - 0.762050426 seconds time elapsed + 2,075,215,740 cycles # 2.939 GHz + 2,958,576,913 instructions # 1.43 insn per cycle + 0.765173729 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.132529e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.158514e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.158514e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.138812e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.164706e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.164706e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.467260 sec +TOTAL : 1.455800 sec INFO: No Floating Point Exceptions have been reported - 4,412,522,593 cycles # 3.000 GHz - 12,934,109,749 instructions # 2.93 insn per cycle - 1.475427698 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 643) (avx2: 0) (512y: 0) (512z: 0) + 4,403,258,677 cycles # 3.018 GHz + 12,926,930,475 instructions # 2.94 insn per cycle + 1.459744309 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 630) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.991463e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.176020e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.176020e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.936303e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.120025e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.120025e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.567363 sec +TOTAL : 0.574725 sec INFO: No Floating Point Exceptions have been reported - 1,733,083,388 cycles # 3.035 GHz - 4,543,937,033 instructions # 2.62 insn per cycle - 0.574183678 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3622) (avx2: 0) (512y: 0) (512z: 0) + 1,726,777,095 cycles # 2.987 GHz + 4,536,166,658 instructions # 2.63 insn per cycle + 0.578775017 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3610) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.771328e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.486383e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.486383e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.813817e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.547021e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.547021e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.304220 sec +TOTAL : 0.298922 sec INFO: No Floating Point Exceptions have been reported - 863,108,492 cycles # 2.802 GHz - 1,921,511,708 instructions # 2.23 insn per cycle - 0.311629404 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3554) (512y: 0) (512z: 0) + 857,389,967 cycles # 2.838 GHz + 1,914,305,415 instructions # 2.23 insn per cycle + 0.302780018 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3536) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.362713e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.211365e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.211365e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.307694e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.166095e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.166095e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.277439 sec +TOTAL : 0.276778 sec INFO: No Floating Point Exceptions have been reported - 808,884,759 cycles # 2.877 GHz - 1,837,754,812 instructions # 2.27 insn per cycle - 0.284539698 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3378) (512y: 22) (512z: 0) + 801,815,801 cycles # 2.863 GHz + 1,829,952,798 instructions # 2.28 insn per cycle + 0.280644988 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3354) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.756673e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.241916e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.241916e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.668444e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.134327e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.134327e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.366455 sec +TOTAL : 0.370402 sec INFO: No Floating Point Exceptions have been reported - 736,983,260 cycles # 1.990 GHz - 1,313,795,216 instructions # 1.78 insn per cycle - 0.373998194 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1968) (512y: 32) (512z: 2435) + 727,659,849 cycles # 1.947 GHz + 1,306,194,061 instructions # 1.80 insn per cycle + 0.374419699 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1905) (512y: 26) (512z: 2435) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 302ee19ef4..14cf46cbcc 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-06-28_20:26:06 +DATE: 2024-08-08_19:59:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.857164e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.383212e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.731282e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.769849e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.334726e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.696577e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.445846 sec +TOTAL : 0.447945 sec INFO: No Floating Point Exceptions have been reported - 1,989,381,537 cycles # 2.950 GHz - 2,820,748,525 instructions # 1.42 insn per cycle - 0.741246343 seconds time elapsed + 1,970,077,649 cycles # 2.938 GHz + 2,764,650,199 instructions # 1.40 insn per cycle + 0.727384144 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.606744e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.198388e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.549914e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.502555e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.204679e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.563131e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.526247 sec +TOTAL : 0.530343 sec INFO: No Floating Point Exceptions have been reported - 2,280,337,997 cycles # 2.953 GHz - 3,250,188,330 instructions # 1.43 insn per cycle - 0.829002703 seconds time elapsed + 2,259,914,656 cycles # 2.930 GHz + 3,250,253,432 instructions # 1.44 insn per cycle + 0.828686428 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.088780e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.111564e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.111564e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.069358e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092261e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092261e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.528773 sec +TOTAL : 1.552012 sec INFO: No Floating Point Exceptions have been reported - 4,656,228,550 cycles # 3.043 GHz - 13,184,186,040 instructions # 2.83 insn per cycle - 1.534455484 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 694) (avx2: 0) (512y: 0) (512z: 0) + 4,641,202,069 cycles # 2.985 GHz + 13,179,687,646 instructions # 2.84 insn per cycle + 1.555810770 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 681) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.955974e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.029658e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.029658e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.876933e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.946940e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.946940e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.859548 sec +TOTAL : 0.892460 sec INFO: No Floating Point Exceptions have been reported - 2,646,287,948 cycles # 3.067 GHz - 7,482,344,904 instructions # 2.83 insn per cycle - 0.865760719 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3164) (avx2: 0) (512y: 0) (512z: 0) + 2,644,592,448 cycles # 2.953 GHz + 7,475,728,591 instructions # 2.83 insn per cycle + 0.896244087 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3152) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.313508e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.535972e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.535972e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.303870e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.519584e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.519584e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.517719 sec +TOTAL : 0.515449 sec INFO: No Floating Point Exceptions have been reported - 1,485,493,063 cycles # 2.852 GHz - 3,134,701,988 instructions # 2.11 insn per cycle - 0.523996490 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3137) (512y: 0) (512z: 0) + 1,473,674,467 cycles # 2.841 GHz + 3,129,036,980 instructions # 2.12 insn per cycle + 0.519216773 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3119) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.732479e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.004169e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.004169e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.630465e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.893768e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.893768e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.461514 sec +TOTAL : 0.471176 sec INFO: No Floating Point Exceptions have been reported - 1,325,634,826 cycles # 2.855 GHz - 2,988,988,966 instructions # 2.25 insn per cycle - 0.467757291 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2905) (512y: 110) (512z: 0) + 1,324,066,570 cycles # 2.791 GHz + 2,982,910,932 instructions # 2.25 insn per cycle + 0.474943404 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2881) (512y: 110) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.417582e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.531347e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.531347e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.354541e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.462714e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.462714e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.702407 sec +TOTAL : 0.717844 sec INFO: No Floating Point Exceptions have been reported - 1,370,431,429 cycles # 1.943 GHz - 1,997,409,518 instructions # 1.46 insn per cycle - 0.709124260 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1737) (512y: 114) (512z: 2251) + 1,364,512,931 cycles # 1.893 GHz + 1,991,624,740 instructions # 1.46 insn per cycle + 0.721728207 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1656) (512y: 108) (512z: 2251) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 25931b257e..5b20c017bf 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-06-28_20:26:19 +DATE: 2024-08-08_19:59:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.833950e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.211642e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.559558e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.764426e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.211229e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.545216e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.449366 sec +TOTAL : 0.450206 sec INFO: No Floating Point Exceptions have been reported - 1,956,372,552 cycles # 2.947 GHz - 2,728,568,804 instructions # 1.39 insn per cycle - 0.863711942 seconds time elapsed + 1,949,946,468 cycles # 2.935 GHz + 2,761,346,859 instructions # 1.42 insn per cycle + 0.722536101 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.575384e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.039636e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.376287e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.478869e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.028008e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.358881e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.525393 sec +TOTAL : 0.526742 sec INFO: No Floating Point Exceptions have been reported - 2,278,493,650 cycles # 2.960 GHz - 3,260,222,958 instructions # 1.43 insn per cycle - 0.826868506 seconds time elapsed + 2,265,443,315 cycles # 2.945 GHz + 3,237,723,769 instructions # 1.43 insn per cycle + 0.826628143 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.088519e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.111448e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.111448e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.082497e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.105654e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.105654e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.526994 sec +TOTAL : 1.532875 sec INFO: No Floating Point Exceptions have been reported - 4,638,191,750 cycles # 3.030 GHz - 13,173,310,720 instructions # 2.84 insn per cycle - 1.535040418 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 679) (avx2: 0) (512y: 0) (512z: 0) + 4,647,233,937 cycles # 3.025 GHz + 13,168,093,251 instructions # 2.83 insn per cycle + 1.537009895 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 666) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.926197e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.997457e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.997457e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.916408e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.986697e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.986697e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.871595 sec +TOTAL : 0.873749 sec INFO: No Floating Point Exceptions have been reported - 2,645,790,535 cycles # 3.021 GHz - 7,484,268,569 instructions # 2.83 insn per cycle - 0.879191644 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3153) (avx2: 0) (512y: 0) (512z: 0) + 2,638,584,974 cycles # 3.010 GHz + 7,477,829,189 instructions # 2.83 insn per cycle + 0.877352084 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.372139e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.594706e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.594706e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.313421e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.533027e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.533027e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.507773 sec +TOTAL : 0.513511 sec INFO: No Floating Point Exceptions have been reported - 1,475,182,947 cycles # 2.884 GHz - 3,135,352,683 instructions # 2.13 insn per cycle - 0.514367920 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3115) (512y: 0) (512z: 0) + 1,473,425,351 cycles # 2.852 GHz + 3,129,237,400 instructions # 2.12 insn per cycle + 0.517237290 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3097) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.762089e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.038043e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.038043e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.703540e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.984962e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.984962e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.457004 sec +TOTAL : 0.461287 sec INFO: No Floating Point Exceptions have been reported - 1,327,429,252 cycles # 2.880 GHz - 2,989,142,789 instructions # 2.25 insn per cycle - 0.463756238 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2881) (512y: 110) (512z: 0) + 1,320,825,681 cycles # 2.850 GHz + 2,983,955,617 instructions # 2.26 insn per cycle + 0.465038534 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2857) (512y: 110) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.413330e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.527512e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.527512e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.367399e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.477116e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.477116e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.702513 sec +TOTAL : 0.713600 sec INFO: No Floating Point Exceptions have been reported - 1,371,536,812 cycles # 1.941 GHz - 1,997,365,728 instructions # 1.46 insn per cycle - 0.709597826 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1713) (512y: 114) (512z: 2251) + 1,364,189,990 cycles # 1.903 GHz + 1,991,688,961 instructions # 1.46 insn per cycle + 0.717422383 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1632) (512y: 108) (512z: 2251) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index f32a724279..83b828ef2e 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-06-28_21:12:11 +DATE: 2024-08-08_20:39:39 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.891269e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.097165e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.182586e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.966123e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.101302e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.184882e+08 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.518623 sec +TOTAL : 0.517997 sec INFO: No Floating Point Exceptions have been reported - 2,223,632,508 cycles # 2.966 GHz - 3,217,766,625 instructions # 1.45 insn per cycle - 0.806241893 seconds time elapsed + 2,197,627,386 cycles # 2.931 GHz + 3,156,596,662 instructions # 1.44 insn per cycle + 0.806377685 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.688566e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.727462e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.727462e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.676906e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.715525e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.715525e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.319846 sec +TOTAL : 6.391723 sec INFO: No Floating Point Exceptions have been reported - 19,281,617,903 cycles # 3.049 GHz - 51,950,129,769 instructions # 2.69 insn per cycle - 6.325274001 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 681) (avx2: 0) (512y: 0) (512z: 0) + 19,396,886,248 cycles # 3.031 GHz + 52,050,532,705 instructions # 2.68 insn per cycle + 6.400835825 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.003345e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.138677e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.138677e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.012360e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.148434e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.148434e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.601344 sec +TOTAL : 3.619594 sec INFO: No Floating Point Exceptions have been reported - 10,942,559,787 cycles # 3.035 GHz - 30,789,312,911 instructions # 2.81 insn per cycle - 3.606762368 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2929) (avx2: 0) (512y: 0) (512z: 0) + 11,008,104,240 cycles # 3.034 GHz + 30,899,851,824 instructions # 2.81 insn per cycle + 3.628709587 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2914) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.784075e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.119038e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.119038e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.811277e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.159957e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.159957e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.300582 sec +TOTAL : 2.317730 sec INFO: No Floating Point Exceptions have been reported - 6,469,586,871 cycles # 2.807 GHz - 13,668,839,399 instructions # 2.11 insn per cycle - 2.306000495 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2952) (512y: 0) (512z: 0) + 6,603,833,232 cycles # 2.839 GHz + 13,785,660,246 instructions # 2.09 insn per cycle + 2.326886320 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2934) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.242513e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.669269e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.669269e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.274677e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.701182e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.701182e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.113701 sec +TOTAL : 2.128100 sec INFO: No Floating Point Exceptions have been reported - 5,923,179,599 cycles # 2.796 GHz - 13,009,174,359 instructions # 2.20 insn per cycle - 2.119346982 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2684) (512y: 146) (512z: 0) + 6,037,170,556 cycles # 2.826 GHz + 13,124,188,246 instructions # 2.17 insn per cycle + 2.137191260 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2660) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.615364e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.807396e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.807396e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.546906e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.734269e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.734269e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.009511 sec +TOTAL : 3.095180 sec INFO: No Floating Point Exceptions have been reported - 5,858,071,251 cycles # 1.944 GHz - 8,592,010,560 instructions # 1.47 insn per cycle - 3.014951128 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1518) (512y: 128) (512z: 1942) + 5,952,641,894 cycles # 1.919 GHz + 8,707,382,958 instructions # 1.46 insn per cycle + 3.104614357 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1494) (512y: 128) (512z: 1942) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt index 73fd47669c..6dfb3d97d4 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-06-28_21:12:36 +DATE: 2024-08-08_20:40:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.867031e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.100662e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.185332e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.936743e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.101495e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.185931e+08 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.520675 sec +TOTAL : 0.520732 sec INFO: No Floating Point Exceptions have been reported - 2,224,693,306 cycles # 2.963 GHz - 3,176,876,470 instructions # 1.43 insn per cycle - 0.809131316 seconds time elapsed + 2,199,613,002 cycles # 2.925 GHz + 3,199,605,848 instructions # 1.45 insn per cycle + 0.808356541 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.773905e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.816144e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.816144e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.741086e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.782692e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.782692e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.020472 sec +TOTAL : 6.159994 sec INFO: No Floating Point Exceptions have been reported - 18,445,965,429 cycles # 3.062 GHz - 50,082,657,641 instructions # 2.72 insn per cycle - 6.025879163 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 639) (avx2: 0) (512y: 0) (512z: 0) + 18,606,289,146 cycles # 3.016 GHz + 50,188,372,015 instructions # 2.70 insn per cycle + 6.169438178 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 626) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.193122e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.346686e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.346686e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.098336e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.247173e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.247173e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.392949 sec +TOTAL : 3.523816 sec INFO: No Floating Point Exceptions have been reported - 10,374,405,143 cycles # 3.054 GHz - 29,167,609,202 instructions # 2.81 insn per cycle - 3.398383511 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2747) (avx2: 0) (512y: 0) (512z: 0) + 10,442,361,179 cycles # 2.956 GHz + 29,279,251,351 instructions # 2.80 insn per cycle + 3.532990329 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2732) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.537528e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.839673e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.839673e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.443138e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.746940e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.746940e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.418900 sec +TOTAL : 2.497852 sec INFO: No Floating Point Exceptions have been reported - 6,949,645,422 cycles # 2.868 GHz - 15,150,928,033 instructions # 2.18 insn per cycle - 2.424241351 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3032) (512y: 0) (512z: 0) + 7,066,085,833 cycles # 2.820 GHz + 15,266,746,500 instructions # 2.16 insn per cycle + 2.506843234 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3014) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.710563e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.035488e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.035488e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.619490e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.939857e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.939857e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.334255 sec +TOTAL : 2.408665 sec INFO: No Floating Point Exceptions have been reported - 6,712,488,345 cycles # 2.870 GHz - 14,622,404,295 instructions # 2.18 insn per cycle - 2.339601480 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2634) (512y: 302) (512z: 0) + 6,801,023,817 cycles # 2.814 GHz + 14,741,025,083 instructions # 2.17 insn per cycle + 2.418105582 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2610) (512y: 302) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.519771e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.696832e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.696832e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.467108e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.646231e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.646231e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.089786 sec +TOTAL : 3.162174 sec INFO: No Floating Point Exceptions have been reported - 6,048,674,512 cycles # 1.955 GHz - 10,341,337,272 instructions # 1.71 insn per cycle - 3.095220098 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1280) (512y: 214) (512z: 2129) + 6,163,693,414 cycles # 1.944 GHz + 10,458,436,313 instructions # 1.70 insn per cycle + 3.171538437 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1256) (512y: 214) (512z: 2129) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index 2f4b520747..f2fae03e6f 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-06-28_21:13:02 +DATE: 2024-08-08_20:40:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.117967e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.014428e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.165976e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.265904e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.014084e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.164702e+08 ) sec^-1 MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.475599 sec +TOTAL : 0.479298 sec INFO: No Floating Point Exceptions have been reported - 2,098,451,460 cycles # 2.983 GHz - 2,991,055,796 instructions # 1.43 insn per cycle - 0.760185973 seconds time elapsed + 2,081,740,099 cycles # 2.923 GHz + 2,980,788,530 instructions # 1.43 insn per cycle + 0.769444492 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 157 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.731627e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.773602e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.773602e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.729175e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.771417e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.771417e+05 ) sec^-1 MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 6.141422 sec +TOTAL : 6.156936 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 18,599,220,732 cycles # 3.027 GHz - 51,234,674,126 instructions # 2.75 insn per cycle - 6.146514341 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) + 18,595,330,502 cycles # 3.018 GHz + 51,251,959,778 instructions # 2.76 insn per cycle + 6.163337596 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 625) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -110,20 +113,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.173906e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.454050e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.454050e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.099341e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.368380e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.368380e+05 ) sec^-1 MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.599505 sec +TOTAL : 2.652061 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,952,125,495 cycles # 3.054 GHz - 19,320,215,609 instructions # 2.43 insn per cycle - 2.604647260 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3555) (avx2: 0) (512y: 0) (512z: 0) + 7,973,155,362 cycles # 3.000 GHz + 19,354,832,142 instructions # 2.43 insn per cycle + 2.658432650 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -140,20 +144,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.227356e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.294161e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.294161e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.856741e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.854878e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.854878e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.359948 sec +TOTAL : 1.428829 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,954,758,474 cycles # 2.898 GHz - 8,835,602,425 instructions # 2.23 insn per cycle - 1.365174055 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3719) (512y: 0) (512z: 0) + 4,050,150,212 cycles # 2.823 GHz + 8,874,617,638 instructions # 2.19 insn per cycle + 1.435345706 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3701) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -168,20 +173,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.666240e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.854739e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.854739e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.579308e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.783002e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.783002e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.294891 sec +TOTAL : 1.316483 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,727,185,027 cycles # 2.869 GHz - 8,438,200,999 instructions # 2.26 insn per cycle - 1.300078595 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3555) (512y: 20) (512z: 0) + 3,770,202,308 cycles # 2.852 GHz + 8,473,429,912 instructions # 2.25 insn per cycle + 1.322971561 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3531) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -196,20 +202,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.299305e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.889301e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.889301e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.340113e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.941423e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.941423e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.751707 sec +TOTAL : 1.746808 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,532,326,839 cycles # 2.012 GHz - 6,249,861,118 instructions # 1.77 insn per cycle - 1.756948421 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2453) (512y: 32) (512z: 2288) + 3,535,492,788 cycles # 2.017 GHz + 6,276,858,891 instructions # 1.78 insn per cycle + 1.753255052 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2373) (512y: 24) (512z: 2288) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt index 0a5421b6bf..0a0273143f 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-06-28_21:13:23 +DATE: 2024-08-08_20:40:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.138769e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.031502e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.196830e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.367628e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.048579e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.197733e+08 ) sec^-1 MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.482359 sec +TOTAL : 0.477604 sec INFO: No Floating Point Exceptions have been reported - 2,078,305,223 cycles # 2.945 GHz - 2,939,956,772 instructions # 1.41 insn per cycle - 0.764061380 seconds time elapsed + 2,076,219,464 cycles # 2.927 GHz + 2,975,745,460 instructions # 1.43 insn per cycle + 0.766187526 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 131 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.770832e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.814865e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.814865e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.736285e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.779068e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.779068e+05 ) sec^-1 MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 6.006723 sec +TOTAL : 6.132525 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 18,001,534,135 cycles # 2.995 GHz - 49,621,174,694 instructions # 2.76 insn per cycle - 6.011974065 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 626) (avx2: 0) (512y: 0) (512z: 0) + 18,052,449,940 cycles # 2.941 GHz + 49,636,091,735 instructions # 2.75 insn per cycle + 6.138910377 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 613) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -110,20 +113,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.694395e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.041943e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.041943e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.614737e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.962775e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.962775e+05 ) sec^-1 MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.321218 sec +TOTAL : 2.366728 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,097,809,138 cycles # 3.052 GHz - 18,485,382,753 instructions # 2.60 insn per cycle - 2.326522398 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3247) (avx2: 0) (512y: 0) (512z: 0) + 7,117,859,932 cycles # 3.001 GHz + 18,522,428,859 instructions # 2.60 insn per cycle + 2.373189090 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3235) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -140,20 +144,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.589453e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.061103e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.061103e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.520738e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.991057e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.991057e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.962194 sec +TOTAL : 1.992175 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,644,401,281 cycles # 2.870 GHz - 10,850,847,216 instructions # 1.92 insn per cycle - 1.967315204 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4278) (512y: 0) (512z: 0) + 5,687,734,724 cycles # 2.847 GHz + 10,882,767,796 instructions # 1.91 insn per cycle + 1.998751657 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4260) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -170,20 +175,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.669808e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.159078e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.159078e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.605855e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.093953e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.093953e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.936979 sec +TOTAL : 1.963543 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,562,205,138 cycles # 2.865 GHz - 10,548,817,439 instructions # 1.90 insn per cycle - 1.942344639 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4147) (512y: 12) (512z: 0) + 5,605,481,105 cycles # 2.846 GHz + 10,580,081,810 instructions # 1.89 insn per cycle + 1.969981859 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4123) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -200,20 +206,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.667808e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.981925e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.981925e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.560324e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.865892e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.865892e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 2.332449 sec +TOTAL : 2.392840 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,630,982,511 cycles # 1.982 GHz - 8,663,797,017 instructions # 1.87 insn per cycle - 2.337797442 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2929) (512y: 8) (512z: 2883) + 4,694,796,569 cycles # 1.957 GHz + 8,695,099,464 instructions # 1.85 insn per cycle + 2.399389128 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2849) (512y: 0) (512z: 2883) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 4f10d746fb..62d3c322fa 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-06-28_21:13:46 +DATE: 2024-08-08_20:41:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.884315e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.101968e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.186528e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.961744e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.101148e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.184921e+08 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.518718 sec +TOTAL : 0.519363 sec INFO: No Floating Point Exceptions have been reported - 2,224,545,388 cycles # 2.964 GHz - 3,211,728,542 instructions # 1.44 insn per cycle - 0.807600897 seconds time elapsed + 2,191,794,568 cycles # 2.919 GHz + 3,157,238,703 instructions # 1.44 insn per cycle + 0.807852407 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.579495e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.613967e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.613967e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.547380e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.581051e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.581051e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.750243 sec +TOTAL : 6.917943 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 20,483,878,120 cycles # 3.033 GHz - 51,951,197,139 instructions # 2.54 insn per cycle - 6.755548112 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) + 20,590,059,617 cycles # 2.973 GHz + 52,050,938,989 instructions # 2.53 insn per cycle + 6.927193752 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -110,20 +113,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.806286e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.923526e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.923526e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.762310e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.879212e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.879212e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.845987 sec +TOTAL : 3.935303 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 11,542,409,923 cycles # 2.998 GHz - 30,594,439,566 instructions # 2.65 insn per cycle - 3.851470148 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2982) (avx2: 0) (512y: 0) (512z: 0) + 11,659,111,162 cycles # 2.956 GHz + 30,715,351,599 instructions # 2.63 insn per cycle + 3.944612578 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2970) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -140,20 +144,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.722824e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.056906e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.056906e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.631108e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.954751e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.954751e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.331102 sec +TOTAL : 2.401648 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,688,713,601 cycles # 2.864 GHz - 13,612,289,001 instructions # 2.04 insn per cycle - 2.336516500 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3124) (512y: 0) (512z: 0) + 6,824,462,536 cycles # 2.832 GHz + 13,725,309,322 instructions # 2.01 insn per cycle + 2.410817230 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3106) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -170,20 +175,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.154620e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.545538e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.545538e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.105035e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.496184e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.496184e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.192487 sec +TOTAL : 2.189054 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,160,306,610 cycles # 2.843 GHz - 12,983,189,743 instructions # 2.11 insn per cycle - 2.199017366 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2863) (512y: 150) (512z: 0) + 6,256,988,161 cycles # 2.848 GHz + 13,091,196,075 instructions # 2.09 insn per cycle + 2.197929864 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2839) (512y: 150) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -200,20 +206,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.308189e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.463723e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.463723e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.274756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.429596e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.429596e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.278659 sec +TOTAL : 3.340001 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,408,069,755 cycles # 1.952 GHz - 8,704,551,378 instructions # 1.36 insn per cycle - 3.284062400 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1815) (512y: 134) (512z: 2012) + 6,530,704,290 cycles # 1.951 GHz + 8,820,931,604 instructions # 1.35 insn per cycle + 3.348983212 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1769) (512y: 130) (512z: 2012) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt index 0521944df3..8f692fc05c 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-06-28_21:14:12 +DATE: 2024-08-08_20:41:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.848166e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.099691e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.185849e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.985439e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.104211e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.186889e+08 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.523260 sec +TOTAL : 0.520398 sec INFO: No Floating Point Exceptions have been reported - 2,233,257,640 cycles # 2.961 GHz - 3,232,811,415 instructions # 1.45 insn per cycle - 0.813371271 seconds time elapsed + 2,215,259,816 cycles # 2.943 GHz + 3,181,112,910 instructions # 1.44 insn per cycle + 0.810106845 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.667950e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.705744e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.705744e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.642914e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.679857e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.679857e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.397356 sec +TOTAL : 6.520897 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 19,513,033,741 cycles # 3.048 GHz - 49,980,418,654 instructions # 2.56 insn per cycle - 6.402937387 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 612) (avx2: 0) (512y: 0) (512z: 0) + 19,742,813,002 cycles # 3.024 GHz + 50,090,585,504 instructions # 2.54 insn per cycle + 6.530114912 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 599) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -110,20 +113,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.929554e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.057191e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.057191e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.996801e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.132711e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.132711e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.687798 sec +TOTAL : 3.635789 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 10,975,545,528 cycles # 2.972 GHz - 29,099,576,131 instructions # 2.65 insn per cycle - 3.693239239 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2818) (avx2: 0) (512y: 0) (512z: 0) + 11,015,177,767 cycles # 3.023 GHz + 29,218,453,275 instructions # 2.65 insn per cycle + 3.644811061 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2806) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -140,20 +144,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.763591e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.971776e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.971776e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.818882e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.034730e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.034730e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.898131 sec +TOTAL : 2.883629 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 8,065,988,902 cycles # 2.779 GHz - 15,176,934,150 instructions # 1.88 insn per cycle - 2.903385335 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 0) (512z: 0) + 8,167,532,623 cycles # 2.824 GHz + 15,289,290,626 instructions # 1.87 insn per cycle + 2.892785978 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3190) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -170,20 +175,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.100432e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.344362e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.344362e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.019354e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.261718e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.261718e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.667556 sec +TOTAL : 2.748891 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,691,086,412 cycles # 2.878 GHz - 14,485,100,498 instructions # 1.88 insn per cycle - 2.672916815 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2786) (512y: 304) (512z: 0) + 7,796,139,330 cycles # 2.827 GHz + 14,598,894,712 instructions # 1.87 insn per cycle + 2.758146376 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2762) (512y: 304) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -200,20 +206,21 @@ Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.246284e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.395334e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.395334e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.130478e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.273768e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.273768e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.338576 sec +TOTAL : 3.488340 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,541,048,191 cycles # 1.957 GHz - 9,899,451,402 instructions # 1.51 insn per cycle - 3.343805399 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1588) (512y: 220) (512z: 2216) + 6,648,747,235 cycles # 1.902 GHz + 10,013,894,735 instructions # 1.51 insn per cycle + 3.497416797 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1542) (512y: 216) (512z: 2216) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 90c6b8c61e..ad80cd52ba 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-06-28_21:11:10 +DATE: 2024-08-08_20:38:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.193723e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.215926e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.219404e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.191569e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.214197e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.217917e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.461231 sec +TOTAL : 0.458797 sec INFO: No Floating Point Exceptions have been reported - 2,031,756,874 cycles # 2.969 GHz - 2,962,940,364 instructions # 1.46 insn per cycle - 0.741692237 seconds time elapsed + 1,983,013,526 cycles # 2.927 GHz + 2,898,600,678 instructions # 1.46 insn per cycle + 0.735167670 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.824383e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.989844e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.000074e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.853741e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.992878e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.001850e+05 ) sec^-1 MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.477201 sec +TOTAL : 0.478795 sec INFO: No Floating Point Exceptions have been reported - 2,063,797,965 cycles # 2.957 GHz - 3,051,978,094 instructions # 1.48 insn per cycle - 0.754833361 seconds time elapsed + 2,032,935,359 cycles # 2.895 GHz + 3,002,750,539 instructions # 1.48 insn per cycle + 0.759651454 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.546074e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.549505e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.549505e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.535539e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.539012e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.539012e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.154149 sec +TOTAL : 0.151546 sec INFO: No Floating Point Exceptions have been reported - 476,458,242 cycles # 3.028 GHz - 1,396,962,991 instructions # 2.93 insn per cycle - 0.158033335 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3921) (avx2: 0) (512y: 0) (512z: 0) + 468,124,472 cycles # 3.026 GHz + 1,389,955,355 instructions # 2.97 insn per cycle + 0.155210727 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3908) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.769944e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.782647e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.782647e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.637495e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.649053e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.649053e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.082624 sec +TOTAL : 0.081392 sec INFO: No Floating Point Exceptions have been reported - 247,067,561 cycles # 2.873 GHz - 700,108,508 instructions # 2.83 insn per cycle - 0.086505657 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9495) (avx2: 0) (512y: 0) (512z: 0) + 240,371,597 cycles # 2.843 GHz + 693,129,674 instructions # 2.88 insn per cycle + 0.085091876 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9483) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.495648e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.501499e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.501499e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.470591e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.476735e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.476735e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.040068 sec +TOTAL : 0.038239 sec INFO: No Floating Point Exceptions have been reported - 121,183,823 cycles # 2.800 GHz - 265,080,473 instructions # 2.19 insn per cycle - 0.043900082 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8514) (512y: 0) (512z: 0) + 114,892,967 cycles # 2.759 GHz + 258,045,984 instructions # 2.25 insn per cycle + 0.042251807 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8496) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.687162e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.695060e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.695060e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.699002e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.707705e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.707705e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.036254 sec +TOTAL : 0.033054 sec INFO: No Floating Point Exceptions have been reported - 109,659,099 cycles # 2.745 GHz - 247,206,412 instructions # 2.25 insn per cycle - 0.040604680 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8157) (512y: 150) (512z: 0) + 102,370,235 cycles # 2.829 GHz + 240,205,792 instructions # 2.35 insn per cycle + 0.036714327 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8133) (512y: 150) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.223877e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.228733e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.228733e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.284659e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.290558e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.290558e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.048129 sec +TOTAL : 0.043329 sec INFO: No Floating Point Exceptions have been reported - 97,361,748 cycles # 1.896 GHz - 141,545,344 instructions # 1.45 insn per cycle - 0.051936443 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1955) (512y: 126) (512z: 7089) + 89,664,319 cycles # 1.930 GHz + 134,445,525 instructions # 1.50 insn per cycle + 0.047102954 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1931) (512y: 126) (512z: 7089) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt index 13930c5a4b..ce829c6200 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-06-28_21:11:21 +DATE: 2024-08-08_20:38:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.238168e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.265600e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.269385e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.249020e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.272842e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.276725e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.464107 sec +TOTAL : 0.461905 sec INFO: No Floating Point Exceptions have been reported - 1,996,708,175 cycles # 2.888 GHz - 2,944,414,956 instructions # 1.47 insn per cycle - 0.748472969 seconds time elapsed + 2,018,577,231 cycles # 2.927 GHz + 2,882,435,680 instructions # 1.43 insn per cycle + 0.748301491 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.940461e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.101663e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.112629e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.955136e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.095621e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.108051e+05 ) sec^-1 MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.480375 sec +TOTAL : 0.478584 sec INFO: No Floating Point Exceptions have been reported - 2,101,513,196 cycles # 2.947 GHz - 3,103,578,303 instructions # 1.48 insn per cycle - 0.770268851 seconds time elapsed + 2,069,849,202 cycles # 2.946 GHz + 3,022,582,128 instructions # 1.46 insn per cycle + 0.760103886 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.331366e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.334564e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.334564e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.498608e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.502028e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.502028e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.163520 sec +TOTAL : 0.152353 sec INFO: No Floating Point Exceptions have been reported - 475,742,235 cycles # 2.848 GHz - 1,392,453,483 instructions # 2.93 insn per cycle - 0.167648338 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3809) (avx2: 0) (512y: 0) (512z: 0) + 465,735,866 cycles # 2.994 GHz + 1,385,207,858 instructions # 2.97 insn per cycle + 0.156142730 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3796) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.895678e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.906547e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.906547e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.699480e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.712661e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.712661e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.093681 sec +TOTAL : 0.080022 sec INFO: No Floating Point Exceptions have been reported - 247,202,379 cycles # 2.547 GHz - 696,396,818 instructions # 2.82 insn per cycle - 0.097666857 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9540) (avx2: 0) (512y: 0) (512z: 0) + 238,839,052 cycles # 2.875 GHz + 689,228,820 instructions # 2.89 insn per cycle + 0.083649102 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9528) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.477425e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.483400e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.483400e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.515936e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.522249e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.522249e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.039763 sec +TOTAL : 0.036065 sec INFO: No Floating Point Exceptions have been reported - 119,037,303 cycles # 2.761 GHz - 260,641,164 instructions # 2.19 insn per cycle - 0.043647510 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8469) (512y: 0) (512z: 0) + 111,582,476 cycles # 2.848 GHz + 253,551,951 instructions # 2.27 insn per cycle + 0.039739897 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8451) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.633386e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.640521e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.640521e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.680034e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.687653e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.687653e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.036416 sec +TOTAL : 0.032732 sec INFO: No Floating Point Exceptions have been reported - 107,502,149 cycles # 2.701 GHz - 242,803,467 instructions # 2.26 insn per cycle - 0.040331301 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8115) (512y: 150) (512z: 0) + 100,255,842 cycles # 2.793 GHz + 235,731,789 instructions # 2.35 insn per cycle + 0.036414093 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8091) (512y: 150) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.272818e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.278041e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.278041e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.271489e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.276895e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.276895e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.045889 sec +TOTAL : 0.042973 sec INFO: No Floating Point Exceptions have been reported - 94,930,532 cycles # 1.931 GHz - 136,876,173 instructions # 1.44 insn per cycle - 0.049671016 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1911) (512y: 126) (512z: 7093) + 87,728,536 cycles # 1.900 GHz + 129,884,935 instructions # 1.48 insn per cycle + 0.046739732 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1887) (512y: 126) (512z: 7093) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index e0238fd00f..3f66e78e98 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-06-28_21:11:31 +DATE: 2024-08-08_20:38:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.451775e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.461985e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.464542e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.450134e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.460503e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.463108e+04 ) sec^-1 MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 -TOTAL : 0.464407 sec +TOTAL : 0.461786 sec INFO: No Floating Point Exceptions have been reported - 2,018,377,870 cycles # 2.962 GHz - 2,930,359,221 instructions # 1.45 insn per cycle - 0.738289299 seconds time elapsed + 1,983,576,716 cycles # 2.936 GHz + 2,917,710,082 instructions # 1.47 insn per cycle + 0.732112148 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.084747e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.210078e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.222065e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.144453e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.248650e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.259538e+05 ) sec^-1 MeanMatrixElemValue = ( 8.020494e-03 +- 4.025605e-03 ) GeV^-4 -TOTAL : 0.466023 sec +TOTAL : 0.468413 sec INFO: No Floating Point Exceptions have been reported - 2,038,104,278 cycles # 2.956 GHz - 2,947,994,249 instructions # 1.45 insn per cycle - 0.746657233 seconds time elapsed + 2,017,794,611 cycles # 2.933 GHz + 2,930,677,889 instructions # 1.45 insn per cycle + 0.746841147 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.615276e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.618884e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.618884e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.555756e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.559328e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.559328e+03 ) sec^-1 MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.151078 sec +TOTAL : 0.150880 sec INFO: No Floating Point Exceptions have been reported - 471,303,235 cycles # 3.055 GHz - 1,389,260,982 instructions # 2.95 insn per cycle - 0.154884128 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3071) (avx2: 0) (512y: 0) (512z: 0) + 463,646,900 cycles # 3.010 GHz + 1,382,054,083 instructions # 2.98 insn per cycle + 0.154571759 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3058) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.276596e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.281299e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.281299e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.231675e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.235936e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.235936e+04 ) sec^-1 MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.046101 sec +TOTAL : 0.044706 sec INFO: No Floating Point Exceptions have been reported - 140,089,332 cycles # 2.837 GHz - 379,194,848 instructions # 2.71 insn per cycle - 0.050025437 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:10152) (avx2: 0) (512y: 0) (512z: 0) + 132,862,579 cycles # 2.773 GHz + 372,176,524 instructions # 2.80 insn per cycle + 0.048442327 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:10140) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.878046e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.901759e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.901759e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.891678e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.915961e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.915961e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.023061 sec +TOTAL : 0.020296 sec INFO: No Floating Point Exceptions have been reported - 72,082,912 cycles # 2.736 GHz - 149,966,673 instructions # 2.08 insn per cycle - 0.026964267 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9255) (512y: 0) (512z: 0) + 65,005,087 cycles # 2.776 GHz + 142,918,773 instructions # 2.20 insn per cycle + 0.023971535 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9237) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.253397e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.283698e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.283698e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.201047e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.231393e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.231393e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.020929 sec +TOTAL : 0.018450 sec INFO: No Floating Point Exceptions have been reported - 66,702,269 cycles # 2.763 GHz - 139,955,505 instructions # 2.10 insn per cycle - 0.024643778 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8975) (512y: 28) (512z: 0) + 59,790,078 cycles # 2.765 GHz + 132,888,839 instructions # 2.22 insn per cycle + 0.022153075 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8951) (512y: 28) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.540853e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.563913e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.563913e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.264475e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.284066e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.284066e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.025752 sec +TOTAL : 0.025826 sec INFO: No Floating Point Exceptions have been reported - 59,485,252 cycles # 2.054 GHz - 86,826,640 instructions # 1.46 insn per cycle - 0.029598069 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2837) (512y: 32) (512z: 7440) + 53,398,285 cycles # 1.814 GHz + 80,038,410 instructions # 1.50 insn per cycle + 0.029948894 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2813) (512y: 32) (512z: 7440) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt index 82cc1b864e..c0ec66c0e5 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-06-28_21:11:41 +DATE: 2024-08-08_20:39:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.484339e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.495870e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.498733e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.475468e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.488915e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.493523e+04 ) sec^-1 MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 -TOTAL : 0.466683 sec +TOTAL : 0.466666 sec INFO: No Floating Point Exceptions have been reported - 2,040,356,614 cycles # 2.967 GHz - 2,942,177,277 instructions # 1.44 insn per cycle - 0.745417107 seconds time elapsed + 2,035,784,320 cycles # 2.932 GHz + 2,916,651,120 instructions # 1.43 insn per cycle + 0.752059618 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.335689e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.433529e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.444866e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.233883e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.341900e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.353294e+05 ) sec^-1 MeanMatrixElemValue = ( 8.020496e-03 +- 4.025606e-03 ) GeV^-4 -TOTAL : 0.465336 sec +TOTAL : 0.467271 sec INFO: No Floating Point Exceptions have been reported - 2,039,216,026 cycles # 2.964 GHz - 2,973,292,818 instructions # 1.46 insn per cycle - 0.744817103 seconds time elapsed + 2,037,159,179 cycles # 2.946 GHz + 2,882,523,885 instructions # 1.41 insn per cycle + 0.747816184 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.582023e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.585808e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.585808e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.551604e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.554949e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.554949e+03 ) sec^-1 MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.152028 sec +TOTAL : 0.149984 sec INFO: No Floating Point Exceptions have been reported - 468,837,171 cycles # 3.022 GHz - 1,384,022,139 instructions # 2.95 insn per cycle - 0.155930676 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2943) (avx2: 0) (512y: 0) (512z: 0) + 461,532,447 cycles # 3.013 GHz + 1,376,849,888 instructions # 2.98 insn per cycle + 0.153697004 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2930) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.265822e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.270876e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270876e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.248118e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.252450e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.252450e+04 ) sec^-1 MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.045875 sec +TOTAL : 0.043499 sec INFO: No Floating Point Exceptions have been reported - 137,352,487 cycles # 2.799 GHz - 374,377,520 instructions # 2.73 insn per cycle - 0.049677166 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:10135) (avx2: 0) (512y: 0) (512z: 0) + 130,431,744 cycles # 2.801 GHz + 367,402,317 instructions # 2.82 insn per cycle + 0.047010449 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:10123) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.841707e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.865345e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.865345e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.883527e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.907714e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.907714e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.022453 sec +TOTAL : 0.019514 sec INFO: No Floating Point Exceptions have been reported - 70,160,896 cycles # 2.725 GHz - 145,241,864 instructions # 2.07 insn per cycle - 0.026324679 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9209) (512y: 0) (512z: 0) + 62,991,896 cycles # 2.777 GHz + 138,167,276 instructions # 2.19 insn per cycle + 0.023246200 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9191) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.165204e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.204619e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.204619e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.044826e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.071557e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.071557e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.020717 sec +TOTAL : 0.018654 sec INFO: No Floating Point Exceptions have been reported - 64,835,417 cycles # 2.707 GHz - 135,143,508 instructions # 2.08 insn per cycle - 0.024535534 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8931) (512y: 28) (512z: 0) + 57,917,940 cycles # 2.662 GHz + 128,096,344 instructions # 2.21 insn per cycle + 0.022204337 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8907) (512y: 28) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.455631e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.477792e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.477792e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.471457e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.494959e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.494959e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.025693 sec +TOTAL : 0.022784 sec INFO: No Floating Point Exceptions have been reported - 57,629,979 cycles # 1.987 GHz - 82,051,182 instructions # 1.42 insn per cycle - 0.029515961 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2792) (512y: 32) (512z: 7442) + 50,131,984 cycles # 1.927 GHz + 74,930,459 instructions # 1.49 insn per cycle + 0.026643138 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2768) (512y: 32) (512z: 7442) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index ab9c454944..a1cf964e05 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-06-28_21:11:51 +DATE: 2024-08-08_20:39:18 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.175250e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.198055e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.201847e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.170281e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.193514e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.197230e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.461570 sec +TOTAL : 0.460249 sec INFO: No Floating Point Exceptions have been reported - 2,036,984,155 cycles # 2.947 GHz - 2,961,824,334 instructions # 1.45 insn per cycle - 0.749627742 seconds time elapsed + 1,998,727,826 cycles # 2.929 GHz + 2,887,597,557 instructions # 1.44 insn per cycle + 0.739044353 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.826361e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.956648e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.966084e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.840436e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.977655e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.986488e+05 ) sec^-1 MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.478628 sec +TOTAL : 0.480871 sec INFO: No Floating Point Exceptions have been reported - 2,094,008,815 cycles # 2.983 GHz - 3,111,493,141 instructions # 1.49 insn per cycle - 0.759239035 seconds time elapsed + 2,091,938,823 cycles # 2.936 GHz + 3,079,530,757 instructions # 1.47 insn per cycle + 0.770600295 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.466352e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.469566e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.469566e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.326264e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.329481e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.329481e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.157420 sec +TOTAL : 0.161027 sec INFO: No Floating Point Exceptions have been reported - 479,914,950 cycles # 2.988 GHz - 1,405,491,495 instructions # 2.93 insn per cycle - 0.161161459 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3912) (avx2: 0) (512y: 0) (512z: 0) + 471,923,848 cycles # 2.871 GHz + 1,398,593,986 instructions # 2.96 insn per cycle + 0.164917375 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.853341e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.865812e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.865812e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.833451e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.846029e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.846029e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.081922 sec +TOTAL : 0.079301 sec INFO: No Floating Point Exceptions have been reported - 243,063,658 cycles # 2.855 GHz - 695,298,142 instructions # 2.86 insn per cycle - 0.085797726 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9339) (avx2: 0) (512y: 0) (512z: 0) + 236,478,249 cycles # 2.865 GHz + 688,183,765 instructions # 2.91 insn per cycle + 0.083009452 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9327) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.352946e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.358722e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.358722e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.464519e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.470938e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.470938e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.043797 sec +TOTAL : 0.038027 sec INFO: No Floating Point Exceptions have been reported - 121,157,463 cycles # 2.599 GHz - 260,506,764 instructions # 2.15 insn per cycle - 0.047884491 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8369) (512y: 0) (512z: 0) + 113,380,965 cycles # 2.745 GHz + 253,222,188 instructions # 2.23 insn per cycle + 0.041829832 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8351) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.602254e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.609503e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.609503e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.697656e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.705927e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.705927e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.037787 sec +TOTAL : 0.033099 sec INFO: No Floating Point Exceptions have been reported - 108,207,768 cycles # 2.631 GHz - 240,816,726 instructions # 2.23 insn per cycle - 0.041615626 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7513) (512y: 146) (512z: 0) + 100,842,922 cycles # 2.776 GHz + 233,742,979 instructions # 2.32 insn per cycle + 0.036790218 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7489) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.262123e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.267316e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.267316e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.224753e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.229606e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.229606e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.046689 sec +TOTAL : 0.045294 sec INFO: No Floating Point Exceptions have been reported - 98,110,559 cycles # 1.961 GHz - 140,335,875 instructions # 1.43 insn per cycle - 0.050565799 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2085) (512y: 122) (512z: 6355) + 90,903,043 cycles # 1.874 GHz + 133,303,472 instructions # 1.47 insn per cycle + 0.049138947 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2061) (512y: 122) (512z: 6355) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt index dbb1d046cf..e66260167e 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-06-28_21:12:01 +DATE: 2024-08-08_20:39:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.223463e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.251769e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.255527e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.209121e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.235715e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.239868e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.462071 sec +TOTAL : 0.460488 sec INFO: No Floating Point Exceptions have been reported - 2,027,925,192 cycles # 2.945 GHz - 2,938,582,017 instructions # 1.45 insn per cycle - 0.747329246 seconds time elapsed + 1,999,748,612 cycles # 2.928 GHz + 2,930,247,263 instructions # 1.47 insn per cycle + 0.740595703 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -67,18 +67,20 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.920161e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.062136e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.071425e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.929472e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.072806e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.082157e+05 ) sec^-1 MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.478582 sec +TOTAL : 0.482161 sec INFO: No Floating Point Exceptions have been reported - 2,106,891,234 cycles # 2.952 GHz - 3,109,718,200 instructions # 1.48 insn per cycle - 0.770328620 seconds time elapsed + 2,061,793,455 cycles # 2.911 GHz + 3,015,555,211 instructions # 1.46 insn per cycle + 0.766758571 seconds time elapsed ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -95,20 +97,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.553282e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.556636e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.556636e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.493942e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.497215e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497215e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.153094 sec +TOTAL : 0.152521 sec INFO: No Floating Point Exceptions have been reported - 476,099,272 cycles # 3.050 GHz - 1,401,016,604 instructions # 2.94 insn per cycle - 0.156845379 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3813) (avx2: 0) (512y: 0) (512z: 0) + 469,652,977 cycles # 3.017 GHz + 1,393,890,707 instructions # 2.97 insn per cycle + 0.156209215 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -123,20 +126,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.000972e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.014214e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.014214e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.875866e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.888668e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.888668e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.079486 sec +TOTAL : 0.077991 sec INFO: No Floating Point Exceptions have been reported - 242,214,106 cycles # 2.928 GHz - 691,275,602 instructions # 2.85 insn per cycle - 0.083233014 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9372) (avx2: 0) (512y: 0) (512z: 0) + 235,131,903 cycles # 2.896 GHz + 684,356,235 instructions # 2.91 insn per cycle + 0.081716900 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -151,20 +155,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.463005e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.469289e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.469289e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.472431e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.478529e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.478529e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.040105 sec +TOTAL : 0.037179 sec INFO: No Floating Point Exceptions have been reported - 118,533,316 cycles # 2.738 GHz - 255,884,042 instructions # 2.16 insn per cycle - 0.043890674 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8322) (512y: 0) (512z: 0) + 111,325,082 cycles # 2.760 GHz + 248,775,647 instructions # 2.23 insn per cycle + 0.040876097 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8304) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -179,20 +184,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.626081e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.633847e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.633847e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.697458e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.705090e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.705090e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.036586 sec +TOTAL : 0.032417 sec INFO: No Floating Point Exceptions have been reported - 106,383,966 cycles # 2.677 GHz - 236,394,538 instructions # 2.22 insn per cycle - 0.040350677 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7464) (512y: 146) (512z: 0) + 98,963,466 cycles # 2.782 GHz + 229,303,120 instructions # 2.32 insn per cycle + 0.036104618 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7440) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -207,20 +213,21 @@ Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.248196e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.253173e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.253173e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.256457e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.261478e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.261478e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.046442 sec +TOTAL : 0.043443 sec INFO: No Floating Point Exceptions have been reported - 95,760,022 cycles # 1.927 GHz - 135,734,849 instructions # 1.42 insn per cycle - 0.050265152 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2036) (512y: 122) (512z: 6355) + 88,868,110 cycles # 1.900 GHz + 128,801,312 instructions # 1.45 insn per cycle + 0.047318950 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2012) (512y: 122) (512z: 6355) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 60a21f7044..ef58048b29 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-07-23_17:01:29 +DATE: 2024-08-08_20:37:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.420342e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.313282e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.381713e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.665934e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.063349e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.406343e+08 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.522832 sec +TOTAL : 0.506392 sec INFO: No Floating Point Exceptions have been reported - 2,278,234,455 cycles # 2.959 GHz - 3,205,581,944 instructions # 1.41 insn per cycle - 0.827280417 seconds time elapsed + 2,172,824,039 cycles # 2.952 GHz + 3,090,027,466 instructions # 1.42 insn per cycle + 0.793282296 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 132 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -82,15 +82,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.921532e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.019329e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.019329e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.134117e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.048218e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.048218e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.289962 sec +TOTAL : 1.290483 sec INFO: No Floating Point Exceptions have been reported - 3,730,964,441 cycles # 2.882 GHz - 9,721,293,781 instructions # 2.61 insn per cycle - 1.295775979 seconds time elapsed + 3,847,248,044 cycles # 2.962 GHz + 9,842,303,730 instructions # 2.56 insn per cycle + 1.299592545 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 338) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe @@ -111,15 +111,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.609878e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.093510e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.093510e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.531336e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.978158e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.978158e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.762977 sec +TOTAL : 0.826770 sec INFO: No Floating Point Exceptions have been reported - 2,324,074,316 cycles # 3.026 GHz - 5,927,719,424 instructions # 2.55 insn per cycle - 0.768610956 seconds time elapsed + 2,453,692,398 cycles # 2.938 GHz + 6,052,098,536 instructions # 2.47 insn per cycle + 0.835919362 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1376) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe @@ -140,15 +140,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.298759e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.386451e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.386451e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.266889e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.345995e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.345995e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.566149 sec +TOTAL : 0.606570 sec INFO: No Floating Point Exceptions have been reported - 1,665,204,872 cycles # 2.915 GHz - 3,310,874,764 instructions # 1.99 insn per cycle - 0.571828050 seconds time elapsed + 1,785,899,086 cycles # 2.902 GHz + 3,437,083,551 instructions # 1.92 insn per cycle + 0.616030368 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1492) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe @@ -169,15 +169,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.374476e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.525659e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.525659e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.357485e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.522198e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.522198e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.552533 sec +TOTAL : 0.586533 sec INFO: No Floating Point Exceptions have been reported - 1,626,012,950 cycles # 2.916 GHz - 3,280,880,649 instructions # 2.02 insn per cycle - 0.558264683 seconds time elapsed + 1,741,529,265 cycles # 2.926 GHz + 3,407,397,649 instructions # 1.96 insn per cycle + 0.595838672 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1368) (512y: 96) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe @@ -198,15 +198,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.265036e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.279394e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.279394e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.227600e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.220282e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.220282e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.573090 sec +TOTAL : 0.613174 sec INFO: No Floating Point Exceptions have been reported - 1,367,296,463 cycles # 2.365 GHz - 2,420,374,484 instructions # 1.77 insn per cycle - 0.578843377 seconds time elapsed + 1,478,751,325 cycles # 2.377 GHz + 2,546,932,482 instructions # 1.72 insn per cycle + 0.622601431 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 568) (512y: 60) (512z: 1020) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt index c85a2ac0a9..8c70303d63 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-07-23_17:01:41 +DATE: 2024-08-08_20:37:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.466951e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.767444e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.772228e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.814897e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.661637e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.796070e+08 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.529266 sec +TOTAL : 0.507946 sec INFO: No Floating Point Exceptions have been reported - 2,231,633,624 cycles # 2.901 GHz - 3,111,971,882 instructions # 1.39 insn per cycle - 0.827158513 seconds time elapsed + 2,214,460,924 cycles # 2.958 GHz + 3,109,800,964 instructions # 1.40 insn per cycle + 0.807528636 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -82,15 +82,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.499434e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.087743e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.087743e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.340535e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.067339e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.067339e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.213256 sec +TOTAL : 1.264960 sec INFO: No Floating Point Exceptions have been reported - 3,719,151,096 cycles # 3.053 GHz - 9,602,056,797 instructions # 2.58 insn per cycle - 1.218964071 seconds time elapsed + 3,833,057,387 cycles # 3.009 GHz + 9,733,259,839 instructions # 2.54 insn per cycle + 1.274559461 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 356) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe @@ -111,15 +111,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.600652e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.094506e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.094506e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.542135e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.989720e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.989720e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.765607 sec +TOTAL : 0.822438 sec INFO: No Floating Point Exceptions have been reported - 2,343,304,789 cycles # 3.043 GHz - 5,873,196,914 instructions # 2.51 insn per cycle - 0.771257250 seconds time elapsed + 2,444,623,828 cycles # 2.942 GHz + 6,004,739,844 instructions # 2.46 insn per cycle + 0.831745892 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1342) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe @@ -140,15 +140,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.294689e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.380462e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.380462e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.232544e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.257016e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.257016e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.566032 sec +TOTAL : 0.613019 sec INFO: No Floating Point Exceptions have been reported - 1,659,009,853 cycles # 2.906 GHz - 3,283,617,543 instructions # 1.98 insn per cycle - 0.571506758 seconds time elapsed + 1,777,339,853 cycles # 2.859 GHz + 3,416,813,174 instructions # 1.92 insn per cycle + 0.622385987 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1429) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe @@ -169,15 +169,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.264254e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.341747e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.341747e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.366185e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.542246e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.542246e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.577127 sec +TOTAL : 0.584170 sec INFO: No Floating Point Exceptions have been reported - 1,625,973,657 cycles # 2.793 GHz - 3,257,709,469 instructions # 2.00 insn per cycle - 0.582960679 seconds time elapsed + 1,729,011,734 cycles # 2.917 GHz + 3,386,515,960 instructions # 1.96 insn per cycle + 0.593372914 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1321) (512y: 96) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe @@ -198,15 +198,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.108767e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.019417e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.019417e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.212793e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.204561e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.204561e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.610248 sec +TOTAL : 0.617575 sec INFO: No Floating Point Exceptions have been reported - 1,368,423,664 cycles # 2.224 GHz - 2,405,748,439 instructions # 1.76 insn per cycle - 0.616101703 seconds time elapsed + 1,500,885,532 cycles # 2.396 GHz + 2,536,856,422 instructions # 1.69 insn per cycle + 0.627161657 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 535) (512y: 60) (512z: 1006) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 4cba4c8f17..854849f5b9 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-07-23_17:01:53 +DATE: 2024-08-08_20:37:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.067979e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.299959e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.734201e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.471582e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.082860e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.730798e+09 ) sec^-1 MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 -TOTAL : 0.481457 sec +TOTAL : 0.477544 sec INFO: No Floating Point Exceptions have been reported - 2,093,279,906 cycles # 2.954 GHz - 2,954,954,358 instructions # 1.41 insn per cycle - 0.765733186 seconds time elapsed + 2,060,886,859 cycles # 2.928 GHz + 2,892,344,882 instructions # 1.40 insn per cycle + 0.762313323 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 100 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -82,15 +82,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.379309e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.076034e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.076034e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.384427e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.077691e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.077691e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 1.204698 sec +TOTAL : 1.212857 sec INFO: No Floating Point Exceptions have been reported - 3,643,752,944 cycles # 3.013 GHz - 9,596,045,630 instructions # 2.63 insn per cycle - 1.210140981 seconds time elapsed + 3,671,434,294 cycles # 3.013 GHz + 9,632,126,320 instructions # 2.62 insn per cycle + 1.219246655 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 462) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe @@ -111,15 +111,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.299837e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.484212e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.484212e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.313604e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.570590e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.570590e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 0.542592 sec +TOTAL : 0.557914 sec INFO: No Floating Point Exceptions have been reported - 1,633,403,655 cycles # 2.984 GHz - 3,962,789,991 instructions # 2.43 insn per cycle - 0.548005774 seconds time elapsed + 1,698,515,028 cycles # 3.014 GHz + 3,997,527,782 instructions # 2.35 insn per cycle + 0.564171143 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1578) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe @@ -140,15 +140,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.139117e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.581885e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.581885e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.069297e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.474961e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.474961e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.419606 sec +TOTAL : 0.435063 sec INFO: No Floating Point Exceptions have been reported - 1,254,162,397 cycles # 2.954 GHz - 2,493,681,375 instructions # 1.99 insn per cycle - 0.425051478 seconds time elapsed + 1,286,599,575 cycles # 2.919 GHz + 2,528,332,939 instructions # 1.97 insn per cycle + 0.441354656 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1910) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe @@ -169,15 +169,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.138494e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.665958e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.665958e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.180191e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.819453e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.819453e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.421327 sec +TOTAL : 0.425326 sec INFO: No Floating Point Exceptions have been reported - 1,237,307,699 cycles # 2.903 GHz - 2,467,612,553 instructions # 1.99 insn per cycle - 0.426840225 seconds time elapsed + 1,261,525,072 cycles # 2.926 GHz + 2,504,983,030 instructions # 1.99 insn per cycle + 0.431704777 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1855) (512y: 1) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe @@ -198,15 +198,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.027449e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.114286e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.114286e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.850782e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.787254e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.787254e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 -TOTAL : 0.433178 sec +TOTAL : 0.464725 sec INFO: No Floating Point Exceptions have been reported - 1,076,863,966 cycles # 2.460 GHz - 2,071,125,855 instructions # 1.92 insn per cycle - 0.438611419 seconds time elapsed + 1,108,955,129 cycles # 2.357 GHz + 2,107,952,878 instructions # 1.90 insn per cycle + 0.471172185 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1039) (512y: 5) (512z: 1290) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt index 4760459d1f..24f2cc254b 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-07-23_17:02:04 +DATE: 2024-08-08_20:38:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.067489e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.259235e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.713015e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.481519e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.098490e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.734508e+09 ) sec^-1 MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 -TOTAL : 0.484584 sec +TOTAL : 0.480270 sec INFO: No Floating Point Exceptions have been reported - 2,081,403,027 cycles # 2.924 GHz - 2,972,594,980 instructions # 1.43 insn per cycle - 0.768624569 seconds time elapsed + 2,041,258,883 cycles # 2.865 GHz + 2,919,368,257 instructions # 1.43 insn per cycle + 0.770727877 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 93 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -82,15 +82,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.423311e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.085961e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.085961e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.423477e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.084213e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.084213e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 1.198062 sec +TOTAL : 1.208276 sec INFO: No Floating Point Exceptions have been reported - 3,620,156,629 cycles # 3.009 GHz - 9,465,393,926 instructions # 2.61 insn per cycle - 1.203587670 seconds time elapsed + 3,647,443,455 cycles # 3.005 GHz + 9,504,212,055 instructions # 2.61 insn per cycle + 1.214581993 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 366) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe @@ -111,15 +111,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.238505e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.360648e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.360648e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.204450e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.296384e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.296384e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 0.554290 sec +TOTAL : 0.572123 sec INFO: No Floating Point Exceptions have been reported - 1,633,916,845 cycles # 2.922 GHz - 3,928,828,269 instructions # 2.40 insn per cycle - 0.559791116 seconds time elapsed + 1,666,311,430 cycles # 2.883 GHz + 3,968,199,942 instructions # 2.38 insn per cycle + 0.578517715 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1516) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe @@ -140,15 +140,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.121156e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.545020e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.545020e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.086457e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.476966e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.476966e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.421088 sec +TOTAL : 0.433372 sec INFO: No Floating Point Exceptions have been reported - 1,254,249,816 cycles # 2.942 GHz - 2,477,993,318 instructions # 1.98 insn per cycle - 0.426883352 seconds time elapsed + 1,287,648,503 cycles # 2.933 GHz + 2,519,527,968 instructions # 1.96 insn per cycle + 0.439715000 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1801) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe @@ -169,15 +169,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.213732e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.821522e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.821522e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.137610e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.760529e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.760529e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.410608 sec +TOTAL : 0.429722 sec INFO: No Floating Point Exceptions have been reported - 1,222,985,428 cycles # 2.944 GHz - 2,454,854,040 instructions # 2.01 insn per cycle - 0.416044203 seconds time elapsed + 1,269,495,412 cycles # 2.915 GHz + 2,496,260,070 instructions # 1.97 insn per cycle + 0.436264737 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1764) (512y: 1) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe @@ -198,15 +198,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.076410e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.296730e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.296730e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.044380e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.291761e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.291761e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 -TOTAL : 0.427614 sec +TOTAL : 0.438334 sec INFO: No Floating Point Exceptions have been reported - 1,070,051,378 cycles # 2.475 GHz - 2,054,834,381 instructions # 1.92 insn per cycle - 0.433034280 seconds time elapsed + 1,106,020,121 cycles # 2.491 GHz + 2,096,224,924 instructions # 1.90 insn per cycle + 0.444840756 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 934) (512y: 5) (512z: 1271) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index bd3bc8d8f9..097ec6962d 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-07-23_17:02:15 +DATE: 2024-08-08_20:38:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.430459e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.319984e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.394007e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.657009e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.040901e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.368076e+08 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.521551 sec +TOTAL : 0.510823 sec INFO: No Floating Point Exceptions have been reported - 2,263,626,755 cycles # 2.970 GHz - 3,140,513,974 instructions # 1.39 insn per cycle - 0.819288000 seconds time elapsed + 2,202,406,007 cycles # 2.933 GHz + 3,131,483,968 instructions # 1.42 insn per cycle + 0.809574698 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 132 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -82,15 +82,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.284880e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.058338e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.058338e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.987871e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.027797e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.027797e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.239757 sec +TOTAL : 1.312691 sec INFO: No Floating Point Exceptions have been reported - 3,778,922,988 cycles # 3.036 GHz - 9,745,451,778 instructions # 2.58 insn per cycle - 1.245537909 seconds time elapsed + 3,886,479,162 cycles # 2.942 GHz + 9,876,785,784 instructions # 2.54 insn per cycle + 1.321966236 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 338) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe @@ -111,15 +111,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.609344e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.086524e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.086524e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.603482e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.083956e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.083956e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.762267 sec +TOTAL : 0.795166 sec INFO: No Floating Point Exceptions have been reported - 2,282,992,204 cycles # 2.975 GHz - 5,912,624,923 instructions # 2.59 insn per cycle - 0.767997063 seconds time elapsed + 2,395,751,097 cycles # 2.981 GHz + 6,041,369,753 instructions # 2.52 insn per cycle + 0.804292816 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1409) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe @@ -140,15 +140,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.266801e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.350656e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.350656e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.333538e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.457835e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.457835e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.572838 sec +TOTAL : 0.593950 sec INFO: No Floating Point Exceptions have been reported - 1,637,126,786 cycles # 2.833 GHz - 3,250,368,511 instructions # 1.99 insn per cycle - 0.578515372 seconds time elapsed + 1,751,397,279 cycles # 2.907 GHz + 3,381,419,349 instructions # 1.93 insn per cycle + 0.603155882 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1555) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe @@ -169,15 +169,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.415598e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.643733e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.643733e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.383716e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.579987e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.579987e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.544901 sec +TOTAL : 0.584649 sec INFO: No Floating Point Exceptions have been reported - 1,603,336,377 cycles # 2.915 GHz - 3,205,971,638 instructions # 2.00 insn per cycle - 0.550693078 seconds time elapsed + 1,722,820,866 cycles # 2.904 GHz + 3,335,061,421 instructions # 1.94 insn per cycle + 0.593900292 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1434) (512y: 101) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe @@ -198,15 +198,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.250649e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.279035e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.279035e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.223321e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.217067e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.217067e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.576205 sec +TOTAL : 0.618111 sec INFO: No Floating Point Exceptions have been reported - 1,354,591,728 cycles # 2.330 GHz - 2,373,898,744 instructions # 1.75 insn per cycle - 0.581963295 seconds time elapsed + 1,474,024,650 cycles # 2.351 GHz + 2,505,057,782 instructions # 1.70 insn per cycle + 0.627415589 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 744) (512y: 64) (512z: 1062) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt index b203416aeb..909ea75534 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-07-23_17:02:27 +DATE: 2024-08-08_20:38:24 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,15 +49,15 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.443321e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.735482e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.752926e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.791313e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.626392e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.791667e+08 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.524557 sec +TOTAL : 0.506993 sec INFO: No Floating Point Exceptions have been reported - 2,266,586,877 cycles # 2.953 GHz - 3,158,332,812 instructions # 1.39 insn per cycle - 0.824937952 seconds time elapsed + 2,160,282,873 cycles # 2.928 GHz + 3,104,863,193 instructions # 1.44 insn per cycle + 0.795042821 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -82,15 +82,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.343960e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.065936e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.065936e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.274915e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.058342e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.058342e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.230924 sec +TOTAL : 1.272460 sec INFO: No Floating Point Exceptions have been reported - 3,759,605,042 cycles # 3.042 GHz - 9,636,433,569 instructions # 2.56 insn per cycle - 1.236672615 seconds time elapsed + 3,870,727,422 cycles # 3.021 GHz + 9,766,927,758 instructions # 2.52 insn per cycle + 1.281884523 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 356) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe @@ -111,15 +111,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.591612e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.060590e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.060590e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.623095e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.126207e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.126207e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.770075 sec +TOTAL : 0.787281 sec INFO: No Floating Point Exceptions have been reported - 2,304,102,028 cycles # 2.973 GHz - 5,854,779,970 instructions # 2.54 insn per cycle - 0.775710054 seconds time elapsed + 2,408,985,457 cycles # 3.026 GHz + 5,983,716,153 instructions # 2.48 insn per cycle + 0.796654714 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1367) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe @@ -140,15 +140,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.314183e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.423375e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.423375e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.282374e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.352435e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.352435e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.562294 sec +TOTAL : 0.601451 sec INFO: No Floating Point Exceptions have been reported - 1,652,087,916 cycles # 2.912 GHz - 3,213,928,099 instructions # 1.95 insn per cycle - 0.567903877 seconds time elapsed + 1,779,110,472 cycles # 2.917 GHz + 3,343,155,447 instructions # 1.88 insn per cycle + 0.610581817 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1471) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe @@ -169,15 +169,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.431496e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.634136e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.634136e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.404645e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.636849e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.636849e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.539794 sec +TOTAL : 0.577304 sec INFO: No Floating Point Exceptions have been reported - 1,597,382,114 cycles # 2.933 GHz - 3,178,138,631 instructions # 1.99 insn per cycle - 0.545290348 seconds time elapsed + 1,713,534,680 cycles # 2.924 GHz + 3,304,839,422 instructions # 1.93 insn per cycle + 0.586559957 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1370) (512y: 101) (512z: 0) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe @@ -198,15 +198,15 @@ Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inline Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.311837e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.372038e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.372038e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.274336e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.329961e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.329961e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.562137 sec +TOTAL : 0.603476 sec INFO: No Floating Point Exceptions have been reported - 1,361,155,598 cycles # 2.400 GHz - 2,358,011,111 instructions # 1.73 insn per cycle - 0.567788379 seconds time elapsed + 1,481,795,981 cycles # 2.421 GHz + 2,484,912,045 instructions # 1.68 insn per cycle + 0.612779368 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 692) (512y: 64) (512z: 1053) ------------------------------------------------------------------------- runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 4b28d920f4..23a45578df 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_21:07:37 +DATE: 2024-08-08_20:35:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.987516e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.185615e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.286924e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.006324e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.190183e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.288100e+08 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.528112 sec +TOTAL : 0.519336 sec INFO: No Floating Point Exceptions have been reported - 2,142,646,791 cycles # 2.810 GHz - 3,068,281,279 instructions # 1.43 insn per cycle - 0.820901730 seconds time elapsed + 2,213,490,510 cycles # 2.944 GHz + 3,142,609,105 instructions # 1.42 insn per cycle + 0.808787239 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.894025e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.942883e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.942883e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.848625e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.896982e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.896982e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.643752 sec +TOTAL : 5.805390 sec INFO: No Floating Point Exceptions have been reported - 17,226,404,935 cycles # 3.050 GHz - 45,931,312,380 instructions # 2.67 insn per cycle - 5.649207552 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 636) (avx2: 0) (512y: 0) (512z: 0) + 17,322,328,356 cycles # 2.980 GHz + 46,027,314,744 instructions # 2.66 insn per cycle + 5.814672958 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 623) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.306536e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.469427e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.469427e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.232999e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.394305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.394305e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.278748 sec +TOTAL : 3.377455 sec INFO: No Floating Point Exceptions have been reported - 9,984,585,810 cycles # 3.041 GHz - 27,808,479,846 instructions # 2.79 insn per cycle - 3.284501363 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2548) (avx2: 0) (512y: 0) (512z: 0) + 10,089,219,468 cycles # 2.980 GHz + 27,901,985,402 instructions # 2.77 insn per cycle + 3.386689562 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.219913e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.629742e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.629742e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.131636e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.534601e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.534601e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.116298 sec +TOTAL : 2.174966 sec INFO: No Floating Point Exceptions have been reported - 6,086,077,866 cycles # 2.869 GHz - 12,591,046,550 instructions # 2.07 insn per cycle - 2.121982392 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2696) (512y: 0) (512z: 0) + 6,180,272,446 cycles # 2.831 GHz + 12,679,670,239 instructions # 2.05 insn per cycle + 2.183950081 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2613) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.636641e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.104777e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.104777e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.604193e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.099182e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.099182e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 1.966039 sec +TOTAL : 2.003125 sec INFO: No Floating Point Exceptions have been reported - 5,583,643,425 cycles # 2.833 GHz - 12,005,695,706 instructions # 2.15 insn per cycle - 1.971558570 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2444) (512y: 144) (512z: 0) + 5,696,944,820 cycles # 2.832 GHz + 12,097,133,291 instructions # 2.12 insn per cycle + 2.012150160 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2356) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.702611e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.899100e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.899100e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.648289e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.842846e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.842846e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.939991 sec +TOTAL : 3.006654 sec INFO: No Floating Point Exceptions have been reported - 5,757,651,640 cycles # 1.955 GHz - 8,345,980,239 instructions # 1.45 insn per cycle - 2.945651612 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1486) (512y: 122) (512z: 1805) + 5,848,300,882 cycles # 1.940 GHz + 8,438,808,313 instructions # 1.44 insn per cycle + 3.015775673 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 122) (512z: 1805) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt index 474b872b4a..084acffe25 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_21:08:01 +DATE: 2024-08-08_20:35:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.968225e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.178768e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.279997e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.973192e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.180411e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.278662e+08 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.518790 sec +TOTAL : 0.518873 sec INFO: No Floating Point Exceptions have been reported - 2,218,888,564 cycles # 2.961 GHz - 3,191,991,067 instructions # 1.44 insn per cycle - 0.806488977 seconds time elapsed + 2,217,952,324 cycles # 2.952 GHz + 3,211,075,681 instructions # 1.45 insn per cycle + 0.807521486 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.948594e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.001763e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.001763e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.919771e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.971109e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.971109e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.490922 sec +TOTAL : 5.589458 sec INFO: No Floating Point Exceptions have been reported - 16,707,703,822 cycles # 3.040 GHz - 44,917,267,122 instructions # 2.69 insn per cycle - 5.496949602 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 580) (avx2: 0) (512y: 0) (512z: 0) + 16,851,504,003 cycles # 3.011 GHz + 45,007,980,146 instructions # 2.67 insn per cycle + 5.597787166 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.418137e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.601353e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.601353e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.433331e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.615119e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.615119e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.176327 sec +TOTAL : 3.183428 sec INFO: No Floating Point Exceptions have been reported - 9,532,841,539 cycles # 2.997 GHz - 26,690,753,956 instructions # 2.80 insn per cycle - 3.181964222 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2342) (avx2: 0) (512y: 0) (512z: 0) + 9,605,830,601 cycles # 3.010 GHz + 26,781,992,422 instructions # 2.79 insn per cycle + 3.191879831 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2330) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.794970e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.133255e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.133255e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.719654e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.056760e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.056760e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.295112 sec +TOTAL : 2.350234 sec INFO: No Floating Point Exceptions have been reported - 6,599,473,152 cycles # 2.870 GHz - 14,114,597,861 instructions # 2.14 insn per cycle - 2.300710249 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2780) (512y: 0) (512z: 0) + 6,680,473,802 cycles # 2.833 GHz + 14,206,471,082 instructions # 2.13 insn per cycle + 2.358807267 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2697) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.970311e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.333858e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.333858e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.858381e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.210770e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.210770e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.216523 sec +TOTAL : 2.286934 sec INFO: No Floating Point Exceptions have been reported - 6,338,453,582 cycles # 2.854 GHz - 13,709,424,623 instructions # 2.16 insn per cycle - 2.222146521 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2436) (512y: 297) (512z: 0) + 6,467,572,645 cycles # 2.819 GHz + 13,805,117,271 instructions # 2.13 insn per cycle + 2.295500484 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2348) (512y: 297) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.500387e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.675839e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.675839e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.556078e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.738376e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.738376e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.103428 sec +TOTAL : 3.078127 sec INFO: No Floating Point Exceptions have been reported - 5,945,105,969 cycles # 1.913 GHz - 10,105,639,078 instructions # 1.70 insn per cycle - 3.108831696 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1336) (512y: 208) (512z: 1985) + 6,022,357,803 cycles # 1.952 GHz + 10,198,455,945 instructions # 1.69 insn per cycle + 3.086650563 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1306) (512y: 208) (512z: 1985) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index a66d6683a6..3eab9e9753 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_21:08:25 +DATE: 2024-08-08_20:35:54 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.559482e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.212410e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.403401e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.671843e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.219611e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.398007e+08 ) sec^-1 MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 -TOTAL : 0.478047 sec +TOTAL : 0.483015 sec INFO: No Floating Point Exceptions have been reported - 2,106,213,142 cycles # 2.975 GHz - 3,003,323,992 instructions # 1.43 insn per cycle - 0.765175289 seconds time elapsed + 2,057,665,691 cycles # 2.919 GHz + 2,974,139,215 instructions # 1.45 insn per cycle + 0.763755746 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 149 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.002790e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.059233e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.059233e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.976573e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.032296e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.032296e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 -TOTAL : 5.322844 sec +TOTAL : 5.392550 sec INFO: No Floating Point Exceptions have been reported - 16,223,048,121 cycles # 3.046 GHz - 45,327,540,091 instructions # 2.79 insn per cycle - 5.327983233 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 614) (avx2: 0) (512y: 0) (512z: 0) + 16,223,721,004 cycles # 3.006 GHz + 45,343,520,122 instructions # 2.79 insn per cycle + 5.398630583 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 601) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.698933e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.051306e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.051306e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.606915e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.959618e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.959618e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 -TOTAL : 2.317323 sec +TOTAL : 2.365944 sec INFO: No Floating Point Exceptions have been reported - 7,057,769,754 cycles # 3.040 GHz - 17,776,370,522 instructions # 2.52 insn per cycle - 2.322507956 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3154) (avx2: 0) (512y: 0) (512z: 0) + 7,142,483,054 cycles # 3.012 GHz + 17,793,150,450 instructions # 2.49 insn per cycle + 2.371767516 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3136) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.614657e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.810162e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.810162e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.534145e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.726326e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.726326e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.301003 sec +TOTAL : 1.317221 sec INFO: No Floating Point Exceptions have been reported - 3,749,560,002 cycles # 2.872 GHz - 8,268,310,663 instructions # 2.21 insn per cycle - 1.306245545 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3379) (512y: 0) (512z: 0) + 3,766,549,622 cycles # 2.849 GHz + 8,281,231,591 instructions # 2.20 insn per cycle + 1.323030863 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3355) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.173728e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.054118e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.054118e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.037857e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.038500e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.038500e+06 ) sec^-1 MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.229272 sec +TOTAL : 1.247672 sec INFO: No Floating Point Exceptions have been reported - 3,562,944,302 cycles # 2.888 GHz - 7,923,374,917 instructions # 2.22 insn per cycle - 1.234483154 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3231) (512y: 20) (512z: 0) + 3,572,380,687 cycles # 2.852 GHz + 7,938,220,748 instructions # 2.22 insn per cycle + 1.253461191 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3201) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.833328e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.525856e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.525856e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.780907e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.464899e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.464899e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.620133 sec +TOTAL : 1.635161 sec INFO: No Floating Point Exceptions have been reported - 3,261,172,355 cycles # 2.008 GHz - 6,104,371,418 instructions # 1.87 insn per cycle - 1.625447082 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2407) (512y: 24) (512z: 2153) + 3,277,760,479 cycles # 1.999 GHz + 6,118,650,971 instructions # 1.87 insn per cycle + 1.640889669 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2294) (512y: 24) (512z: 2154) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt index 01cbb3ba00..95f2f81a67 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_21:08:45 +DATE: 2024-08-08_20:36:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.977153e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.485991e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.727696e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.014048e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.487826e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.715050e+08 ) sec^-1 MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 -TOTAL : 0.479625 sec +TOTAL : 0.479773 sec INFO: No Floating Point Exceptions have been reported - 2,087,735,706 cycles # 2.950 GHz - 2,974,984,458 instructions # 1.42 insn per cycle - 0.766158623 seconds time elapsed + 2,021,404,320 cycles # 2.871 GHz + 2,909,718,804 instructions # 1.44 insn per cycle + 0.763747586 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.032109e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.091836e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.091836e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.015289e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.073220e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.073220e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 -TOTAL : 5.260965 sec +TOTAL : 5.290195 sec INFO: No Floating Point Exceptions have been reported - 15,970,657,452 cycles # 3.043 GHz - 44,436,012,793 instructions # 2.78 insn per cycle - 5.266053329 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 547) (avx2: 0) (512y: 0) (512z: 0) + 15,992,452,194 cycles # 3.020 GHz + 44,447,001,670 instructions # 2.78 insn per cycle + 5.296101650 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 534) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.469324e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.951349e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.951349e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.486417e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.979858e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.979858e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 -TOTAL : 2.003414 sec +TOTAL : 2.001515 sec INFO: No Floating Point Exceptions have been reported - 6,069,324,654 cycles # 3.023 GHz - 17,077,758,378 instructions # 2.81 insn per cycle - 2.008459288 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2881) (avx2: 0) (512y: 0) (512z: 0) + 6,083,399,365 cycles # 3.032 GHz + 17,096,762,778 instructions # 2.81 insn per cycle + 2.007478242 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2863) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.186571e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.792405e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.792405e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.273384e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.901765e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.901765e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.779064 sec +TOTAL : 1.760820 sec INFO: No Floating Point Exceptions have been reported - 5,029,323,256 cycles # 2.820 GHz - 10,228,128,275 instructions # 2.03 insn per cycle - 1.784210959 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3916) (512y: 0) (512z: 0) + 5,038,046,690 cycles # 2.853 GHz + 10,244,068,560 instructions # 2.03 insn per cycle + 1.766743334 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3892) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.269492e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.881858e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.881858e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.352422e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.995021e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.995021e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.756710 sec +TOTAL : 1.739024 sec INFO: No Floating Point Exceptions have been reported - 4,975,750,592 cycles # 2.825 GHz - 9,998,359,521 instructions # 2.01 insn per cycle - 1.761821685 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3823) (512y: 2) (512z: 0) + 4,995,379,501 cycles # 2.864 GHz + 10,014,742,907 instructions # 2.00 insn per cycle + 1.744931983 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3793) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.927584e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.287376e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.287376e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.909740e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.260066e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.260066e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 2.212094 sec +TOTAL : 2.224170 sec INFO: No Floating Point Exceptions have been reported - 4,375,301,070 cycles # 1.974 GHz - 8,447,130,711 instructions # 1.93 insn per cycle - 2.217463579 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2895) (512y: 4) (512z: 2751) + 4,384,022,767 cycles # 1.967 GHz + 8,465,829,971 instructions # 1.93 insn per cycle + 2.230123024 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2782) (512y: 4) (512z: 2752) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 52252cbfc7..3f2b21ab02 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_21:09:06 +DATE: 2024-08-08_20:36:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.991553e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.180542e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.281710e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.111342e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.183781e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.280569e+08 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.521706 sec +TOTAL : 0.516736 sec INFO: No Floating Point Exceptions have been reported - 2,224,165,682 cycles # 2.953 GHz - 3,203,188,534 instructions # 1.44 insn per cycle - 0.810337574 seconds time elapsed + 2,204,839,521 cycles # 2.950 GHz + 3,193,475,947 instructions # 1.45 insn per cycle + 0.804039579 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.871017e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.918846e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.918846e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.851387e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.898716e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.898716e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.711061 sec +TOTAL : 5.792449 sec INFO: No Floating Point Exceptions have been reported - 17,404,987,337 cycles # 3.045 GHz - 46,083,110,316 instructions # 2.65 insn per cycle - 5.716653594 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 636) (avx2: 0) (512y: 0) (512z: 0) + 17,478,048,232 cycles # 3.014 GHz + 46,175,878,133 instructions # 2.64 insn per cycle + 5.800949907 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 623) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.312498e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.481884e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.481884e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.302826e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.471365e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.471365e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.272999 sec +TOTAL : 3.305610 sec INFO: No Floating Point Exceptions have been reported - 9,986,410,862 cycles # 3.047 GHz - 27,601,244,510 instructions # 2.76 insn per cycle - 3.278531877 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2593) (avx2: 0) (512y: 0) (512z: 0) + 10,029,884,170 cycles # 3.027 GHz + 27,698,012,954 instructions # 2.76 insn per cycle + 3.314264877 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.173705e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.573635e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.573635e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.212203e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.631040e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.631040e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.134679 sec +TOTAL : 2.141280 sec INFO: No Floating Point Exceptions have been reported - 6,031,616,440 cycles # 2.820 GHz - 12,494,854,133 instructions # 2.07 insn per cycle - 2.140101571 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2783) (512y: 0) (512z: 0) + 6,126,755,092 cycles # 2.851 GHz + 12,585,784,837 instructions # 2.05 insn per cycle + 2.149799113 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2765) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.444634e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.896261e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.896261e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.714807e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.220314e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.220314e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.032418 sec +TOTAL : 1.966130 sec INFO: No Floating Point Exceptions have been reported - 5,512,811,090 cycles # 2.706 GHz - 11,931,754,486 instructions # 2.16 insn per cycle - 2.038065532 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2534) (512y: 146) (512z: 0) + 5,614,473,659 cycles # 2.844 GHz + 12,019,662,665 instructions # 2.14 insn per cycle + 1.974902809 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2510) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.605249e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.805954e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.805954e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.735274e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.937488e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.937488e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.017835 sec +TOTAL : 2.937106 sec INFO: No Floating Point Exceptions have been reported - 5,646,843,336 cycles # 1.868 GHz - 8,120,170,284 instructions # 1.44 insn per cycle - 3.023662601 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1668) (512y: 126) (512z: 1865) + 5,684,383,017 cycles # 1.930 GHz + 8,211,471,869 instructions # 1.44 insn per cycle + 2.945845267 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1646) (512y: 126) (512z: 1865) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt index d52539d8e7..9ec77e6c2c 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) -OMPFLAGS=-fopenmp +OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' @@ -40,7 +40,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-06-28_21:09:31 +DATE: 2024-08-08_20:37:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -49,21 +49,23 @@ INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.896359e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.171747e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273533e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.087294e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.176774e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273815e+08 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.523837 sec +TOTAL : 0.521745 sec INFO: No Floating Point Exceptions have been reported - 2,210,261,649 cycles # 2.923 GHz - 3,098,934,216 instructions # 1.40 insn per cycle - 0.814091711 seconds time elapsed + 2,190,333,356 cycles # 2.907 GHz + 3,117,272,451 instructions # 1.42 insn per cycle + 0.811246203 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 @@ -80,20 +82,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.909485e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.960095e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.960095e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.899666e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.949679e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.949679e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.598303 sec +TOTAL : 5.649808 sec INFO: No Floating Point Exceptions have been reported - 16,960,347,258 cycles # 3.027 GHz - 45,101,575,966 instructions # 2.66 insn per cycle - 5.604097252 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 581) (avx2: 0) (512y: 0) (512z: 0) + 17,042,397,704 cycles # 3.012 GHz + 45,200,059,180 instructions # 2.65 insn per cycle + 5.658309716 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 568) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -108,20 +111,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.386759e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.560207e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.560207e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.442760e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.623868e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.623868e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.207946 sec +TOTAL : 3.175173 sec INFO: No Floating Point Exceptions have been reported - 9,517,840,011 cycles # 2.994 GHz - 26,245,772,623 instructions # 2.76 insn per cycle - 3.215418509 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2397) (avx2: 0) (512y: 0) (512z: 0) + 9,616,707,948 cycles # 3.021 GHz + 26,345,303,385 instructions # 2.74 insn per cycle + 3.183844820 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2385) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -136,20 +140,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.136448e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.430910e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.430910e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.409096e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.707370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.707370e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.672758 sec +TOTAL : 2.509673 sec INFO: No Floating Point Exceptions have been reported - 6,760,215,182 cycles # 2.667 GHz - 14,040,901,808 instructions # 2.08 insn per cycle - 2.679767706 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2901) (512y: 0) (512z: 0) + 6,823,505,729 cycles # 2.711 GHz + 14,133,345,545 instructions # 2.07 insn per cycle + 2.518344311 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2883) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -164,20 +169,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.448322e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.786078e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.786078e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.915857e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.278986e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.278986e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.482950 sec +TOTAL : 2.261621 sec INFO: No Floating Point Exceptions have been reported - 6,405,645,930 cycles # 2.679 GHz - 13,525,136,384 instructions # 2.11 insn per cycle - 2.488598452 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2543) (512y: 302) (512z: 0) + 6,478,665,786 cycles # 2.855 GHz + 13,612,638,339 instructions # 2.10 insn per cycle + 2.270008014 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2519) (512y: 302) (512z: 0) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 @@ -192,20 +198,21 @@ Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHe Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.416786e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.592951e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.592951e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.779798e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.989152e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.989152e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.180333 sec +TOTAL : 2.903794 sec INFO: No Floating Point Exceptions have been reported - 5,593,468,380 cycles # 1.770 GHz - 9,219,345,276 instructions # 1.65 insn per cycle - 3.186249051 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1453) (512y: 212) (512z: 2058) + 5,684,727,855 cycles # 1.953 GHz + 9,307,942,112 instructions # 1.64 insn per cycle + 2.912446958 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1431) (512y: 212) (512z: 2058) ------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 3 tests. +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 diff --git a/tools/mg-clang-format/mg-clang-format b/tools/mg-clang-format/mg-clang-format index 37ae30d79b..9f36462da3 100755 --- a/tools/mg-clang-format/mg-clang-format +++ b/tools/mg-clang-format/mg-clang-format @@ -15,22 +15,24 @@ else fi ###echo clangVersion=$clangVersion -if [ ${clangVersion} -ge 13 ]; then +if [ ${clangVersion} -ge 13 ] && [ ${clangVersion} -le 15 ]; then exec clang-format "$@" else if [ ! -d /cvmfs/sft.cern.ch/lcg/releases/clang ]; then - echo "ERROR! clang-format version >= 13 is not installed and /cvmfs/sft.cern.ch/lcg/releases/clang is not reachable" + echo "ERROR! clang-format version >= 13 and <= 15 is not installed and /cvmfs/sft.cern.ch/lcg/releases/clang is not reachable" exit 1 fi redrel=$(cat /etc/redhat-release 2> /dev/null) if [ "${redrel##*release 7}" != "${redrel}" ]; then - clangDir=/cvmfs/sft.cern.ch/lcg/releases/clang/13.0.1-721c8/x86_64-centos7 + ###clangDir=/cvmfs/sft.cern.ch/lcg/releases/clang/13.0.1-721c8/x86_64-centos7 + clangDir=/cvmfs/sft.cern.ch/lcg/releases/clang/15.0.7-27d6b/x86_64-centos7 elif [ "${redrel##*release 8}" != "${redrel}" ]; then clangDir=/cvmfs/sft.cern.ch/lcg/releases/clang/13.0.1-721c8/x86_64-centos8 elif [ "${redrel##*release 9}" != "${redrel}" ]; then - clangDir=/cvmfs/sft.cern.ch/lcg/releases/clang/14.0.6-14bdb/x86_64-centos9 + ###clangDir=/cvmfs/sft.cern.ch/lcg/releases/clang/14.0.6-14bdb/x86_64-centos9 + clangDir=/cvmfs/sft.cern.ch/lcg/releases/clang/15.0.7-27d6b/x86_64-el9 else - echo "ERROR! clang-format version >= 13 is not installed and RedHat release could not be identified (${redrel})" + echo "ERROR! clang-format version >= 13 and <= 15 is not installed and RedHat release could not be identified (${redrel})" exit 1 fi source ${clangDir}/setup.sh